mirror of
https://github.com/rustfs/rustfs.git
synced 2026-01-17 01:30:33 +00:00
add rio/filemeta
This commit is contained in:
@@ -19,6 +19,9 @@ members = [
|
||||
"s3select/api", # S3 Select API interface
|
||||
"s3select/query", # S3 Select query engine
|
||||
"crates/zip",
|
||||
"crates/filemeta",
|
||||
"crates/rio",
|
||||
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
@@ -53,6 +56,8 @@ rustfs-config = { path = "./crates/config", version = "0.0.1" }
|
||||
rustfs-obs = { path = "crates/obs", version = "0.0.1" }
|
||||
rustfs-event-notifier = { path = "crates/event-notifier", version = "0.0.1" }
|
||||
rustfs-utils = { path = "crates/utils", version = "0.0.1" }
|
||||
rustfs-rio = { path = "crates/rio", version = "0.0.1" }
|
||||
rustfs-filemeta = { path = "crates/filemeta", version = "0.0.1" }
|
||||
workers = { path = "./common/workers", version = "0.0.1" }
|
||||
tokio-tar = "0.3.1"
|
||||
atoi = "2.0.0"
|
||||
|
||||
32
crates/filemeta/Cargo.toml
Normal file
32
crates/filemeta/Cargo.toml
Normal file
@@ -0,0 +1,32 @@
|
||||
[package]
|
||||
name = "rustfs-filemeta"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
rust-version.workspace = true
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
crc32fast = "1.4.2"
|
||||
rmp.workspace = true
|
||||
rmp-serde.workspace = true
|
||||
serde.workspace = true
|
||||
time.workspace = true
|
||||
uuid = { workspace = true, features = ["v4", "fast-rng", "serde"] }
|
||||
tokio = { workspace = true, features = ["io-util", "macros", "sync"] }
|
||||
xxhash-rust = { version = "0.8.15", features = ["xxh64"] }
|
||||
|
||||
rustfs-utils = {workspace = true, features= ["hash"]}
|
||||
byteorder = "1.5.0"
|
||||
tracing.workspace = true
|
||||
thiserror.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
|
||||
[[bench]]
|
||||
name = "xl_meta_bench"
|
||||
harness = false
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
238
crates/filemeta/README.md
Normal file
238
crates/filemeta/README.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# RustFS FileMeta
|
||||
|
||||
A high-performance Rust implementation of xl-storage-format-v2, providing complete compatibility with S3-compatible metadata format while offering enhanced performance and safety.
|
||||
|
||||
## Overview
|
||||
|
||||
This crate implements the XL (Erasure Coded) metadata format used for distributed object storage. It provides:
|
||||
|
||||
- **Full S3 Compatibility**: 100% compatible with xl.meta file format
|
||||
- **High Performance**: Optimized for speed with sub-microsecond parsing times
|
||||
- **Memory Safety**: Written in safe Rust with comprehensive error handling
|
||||
- **Comprehensive Testing**: Extensive test suite with real metadata validation
|
||||
- **Cross-Platform**: Supports multiple CPU architectures (x86_64, aarch64)
|
||||
|
||||
## Features
|
||||
|
||||
### Core Functionality
|
||||
- ✅ XL v2 file format parsing and serialization
|
||||
- ✅ MessagePack-based metadata encoding/decoding
|
||||
- ✅ Version management with modification time sorting
|
||||
- ✅ Erasure coding information storage
|
||||
- ✅ Inline data support for small objects
|
||||
- ✅ CRC32 integrity verification using xxHash64
|
||||
- ✅ Delete marker handling
|
||||
- ✅ Legacy version support
|
||||
|
||||
### Advanced Features
|
||||
- ✅ Signature calculation for version integrity
|
||||
- ✅ Metadata validation and compatibility checking
|
||||
- ✅ Version statistics and analytics
|
||||
- ✅ Async I/O support with tokio
|
||||
- ✅ Comprehensive error handling
|
||||
- ✅ Performance benchmarking
|
||||
|
||||
## Performance
|
||||
|
||||
Based on our benchmarks:
|
||||
|
||||
| Operation | Time | Description |
|
||||
|-----------|------|-------------|
|
||||
| Parse Real xl.meta | ~255 ns | Parse authentic xl metadata |
|
||||
| Parse Complex xl.meta | ~1.1 µs | Parse multi-version metadata |
|
||||
| Serialize Real xl.meta | ~659 ns | Serialize to xl format |
|
||||
| Round-trip Real xl.meta | ~1.3 µs | Parse + serialize cycle |
|
||||
| Version Statistics | ~5.2 ns | Calculate version stats |
|
||||
| Integrity Validation | ~7.8 ns | Validate metadata integrity |
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```rust
|
||||
use rustfs_filemeta::file_meta::FileMeta;
|
||||
|
||||
// Load metadata from bytes
|
||||
let metadata = FileMeta::load(&xl_meta_bytes)?;
|
||||
|
||||
// Access version information
|
||||
for version in &metadata.versions {
|
||||
println!("Version ID: {:?}", version.header.version_id);
|
||||
println!("Mod Time: {:?}", version.header.mod_time);
|
||||
}
|
||||
|
||||
// Serialize back to bytes
|
||||
let serialized = metadata.marshal_msg()?;
|
||||
```
|
||||
|
||||
### Advanced Usage
|
||||
|
||||
```rust
|
||||
use rustfs_filemeta::file_meta::FileMeta;
|
||||
|
||||
// Load with validation
|
||||
let mut metadata = FileMeta::load(&xl_meta_bytes)?;
|
||||
|
||||
// Validate integrity
|
||||
metadata.validate_integrity()?;
|
||||
|
||||
// Check xl format compatibility
|
||||
if metadata.is_compatible_with_meta() {
|
||||
println!("Compatible with xl format");
|
||||
}
|
||||
|
||||
// Get version statistics
|
||||
let stats = metadata.get_version_stats();
|
||||
println!("Total versions: {}", stats.total_versions);
|
||||
println!("Object versions: {}", stats.object_versions);
|
||||
println!("Delete markers: {}", stats.delete_markers);
|
||||
```
|
||||
|
||||
### Working with FileInfo
|
||||
|
||||
```rust
|
||||
use rustfs_filemeta::fileinfo::FileInfo;
|
||||
use rustfs_filemeta::file_meta::FileMetaVersion;
|
||||
|
||||
// Convert FileInfo to metadata version
|
||||
let file_info = FileInfo::new("bucket", "object.txt");
|
||||
let meta_version = FileMetaVersion::from(file_info);
|
||||
|
||||
// Add version to metadata
|
||||
metadata.add_version(file_info)?;
|
||||
```
|
||||
|
||||
## Data Structures
|
||||
|
||||
### FileMeta
|
||||
The main metadata container that holds all versions and inline data:
|
||||
|
||||
```rust
|
||||
pub struct FileMeta {
|
||||
pub versions: Vec<FileMetaShallowVersion>,
|
||||
pub data: InlineData,
|
||||
pub meta_ver: u8,
|
||||
}
|
||||
```
|
||||
|
||||
### FileMetaVersion
|
||||
Represents a single object version:
|
||||
|
||||
```rust
|
||||
pub struct FileMetaVersion {
|
||||
pub version_type: VersionType,
|
||||
pub object: Option<MetaObject>,
|
||||
pub delete_marker: Option<MetaDeleteMarker>,
|
||||
pub write_version: u64,
|
||||
}
|
||||
```
|
||||
|
||||
### MetaObject
|
||||
Contains object-specific metadata including erasure coding information:
|
||||
|
||||
```rust
|
||||
pub struct MetaObject {
|
||||
pub version_id: Option<Uuid>,
|
||||
pub data_dir: Option<Uuid>,
|
||||
pub erasure_algorithm: ErasureAlgo,
|
||||
pub erasure_m: usize,
|
||||
pub erasure_n: usize,
|
||||
// ... additional fields
|
||||
}
|
||||
```
|
||||
|
||||
## File Format Compatibility
|
||||
|
||||
This implementation is fully compatible with xl-storage-format-v2:
|
||||
|
||||
- **Header Format**: XL2 v1 format with proper version checking
|
||||
- **Serialization**: MessagePack encoding identical to standard format
|
||||
- **Checksums**: xxHash64-based CRC validation
|
||||
- **Version Types**: Support for Object, Delete, and Legacy versions
|
||||
- **Inline Data**: Compatible inline data storage for small objects
|
||||
|
||||
## Testing
|
||||
|
||||
The crate includes comprehensive tests with real xl metadata:
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
cargo test
|
||||
|
||||
# Run benchmarks
|
||||
cargo bench
|
||||
|
||||
# Run with coverage
|
||||
cargo test --features coverage
|
||||
```
|
||||
|
||||
### Test Coverage
|
||||
- ✅ Real xl.meta file compatibility
|
||||
- ✅ Complex multi-version scenarios
|
||||
- ✅ Error handling and recovery
|
||||
- ✅ Inline data processing
|
||||
- ✅ Signature calculation
|
||||
- ✅ Round-trip serialization
|
||||
- ✅ Performance benchmarks
|
||||
- ✅ Edge cases and boundary conditions
|
||||
|
||||
## Architecture
|
||||
|
||||
The crate follows a modular design:
|
||||
|
||||
```
|
||||
src/
|
||||
├── file_meta.rs # Core metadata structures and logic
|
||||
├── file_meta_inline.rs # Inline data handling
|
||||
├── fileinfo.rs # File information structures
|
||||
├── test_data.rs # Test data generation
|
||||
└── lib.rs # Public API exports
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
Comprehensive error handling with detailed error messages:
|
||||
|
||||
```rust
|
||||
use rustfs_filemeta::error::Error;
|
||||
|
||||
match FileMeta::load(&invalid_data) {
|
||||
Ok(metadata) => { /* process metadata */ },
|
||||
Err(Error::InvalidFormat(msg)) => {
|
||||
eprintln!("Invalid format: {}", msg);
|
||||
},
|
||||
Err(Error::CorruptedData(msg)) => {
|
||||
eprintln!("Corrupted data: {}", msg);
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Other error: {}", e);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `rmp` - MessagePack serialization
|
||||
- `uuid` - UUID handling
|
||||
- `time` - Date/time operations
|
||||
- `xxhash-rust` - Fast hashing
|
||||
- `tokio` - Async runtime (optional)
|
||||
- `criterion` - Benchmarking (dev dependency)
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Add tests for new functionality
|
||||
4. Ensure all tests pass
|
||||
5. Submit a pull request
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the Apache License 2.0 - see the LICENSE file for details.
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
- Original xl-storage-format-v2 implementation contributors
|
||||
- Rust community for excellent crates and tooling
|
||||
- Contributors and testers who helped improve this implementation
|
||||
95
crates/filemeta/benches/xl_meta_bench.rs
Normal file
95
crates/filemeta/benches/xl_meta_bench.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use rustfs_filemeta::{test_data::*, FileMeta};
|
||||
|
||||
fn bench_create_real_xlmeta(c: &mut Criterion) {
|
||||
c.bench_function("create_real_xlmeta", |b| b.iter(|| black_box(create_real_xlmeta().unwrap())));
|
||||
}
|
||||
|
||||
fn bench_create_complex_xlmeta(c: &mut Criterion) {
|
||||
c.bench_function("create_complex_xlmeta", |b| b.iter(|| black_box(create_complex_xlmeta().unwrap())));
|
||||
}
|
||||
|
||||
fn bench_parse_real_xlmeta(c: &mut Criterion) {
|
||||
let data = create_real_xlmeta().unwrap();
|
||||
|
||||
c.bench_function("parse_real_xlmeta", |b| b.iter(|| black_box(FileMeta::load(&data).unwrap())));
|
||||
}
|
||||
|
||||
fn bench_parse_complex_xlmeta(c: &mut Criterion) {
|
||||
let data = create_complex_xlmeta().unwrap();
|
||||
|
||||
c.bench_function("parse_complex_xlmeta", |b| b.iter(|| black_box(FileMeta::load(&data).unwrap())));
|
||||
}
|
||||
|
||||
fn bench_serialize_real_xlmeta(c: &mut Criterion) {
|
||||
let data = create_real_xlmeta().unwrap();
|
||||
let fm = FileMeta::load(&data).unwrap();
|
||||
|
||||
c.bench_function("serialize_real_xlmeta", |b| b.iter(|| black_box(fm.marshal_msg().unwrap())));
|
||||
}
|
||||
|
||||
fn bench_serialize_complex_xlmeta(c: &mut Criterion) {
|
||||
let data = create_complex_xlmeta().unwrap();
|
||||
let fm = FileMeta::load(&data).unwrap();
|
||||
|
||||
c.bench_function("serialize_complex_xlmeta", |b| b.iter(|| black_box(fm.marshal_msg().unwrap())));
|
||||
}
|
||||
|
||||
fn bench_round_trip_real_xlmeta(c: &mut Criterion) {
|
||||
let original_data = create_real_xlmeta().unwrap();
|
||||
|
||||
c.bench_function("round_trip_real_xlmeta", |b| {
|
||||
b.iter(|| {
|
||||
let fm = FileMeta::load(&original_data).unwrap();
|
||||
let serialized = fm.marshal_msg().unwrap();
|
||||
black_box(FileMeta::load(&serialized).unwrap())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_round_trip_complex_xlmeta(c: &mut Criterion) {
|
||||
let original_data = create_complex_xlmeta().unwrap();
|
||||
|
||||
c.bench_function("round_trip_complex_xlmeta", |b| {
|
||||
b.iter(|| {
|
||||
let fm = FileMeta::load(&original_data).unwrap();
|
||||
let serialized = fm.marshal_msg().unwrap();
|
||||
black_box(FileMeta::load(&serialized).unwrap())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_version_stats(c: &mut Criterion) {
|
||||
let data = create_complex_xlmeta().unwrap();
|
||||
let fm = FileMeta::load(&data).unwrap();
|
||||
|
||||
c.bench_function("version_stats", |b| b.iter(|| black_box(fm.get_version_stats())));
|
||||
}
|
||||
|
||||
fn bench_validate_integrity(c: &mut Criterion) {
|
||||
let data = create_real_xlmeta().unwrap();
|
||||
let fm = FileMeta::load(&data).unwrap();
|
||||
|
||||
c.bench_function("validate_integrity", |b| {
|
||||
b.iter(|| {
|
||||
fm.validate_integrity().unwrap();
|
||||
black_box(())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_create_real_xlmeta,
|
||||
bench_create_complex_xlmeta,
|
||||
bench_parse_real_xlmeta,
|
||||
bench_parse_complex_xlmeta,
|
||||
bench_serialize_real_xlmeta,
|
||||
bench_serialize_complex_xlmeta,
|
||||
bench_round_trip_real_xlmeta,
|
||||
bench_round_trip_complex_xlmeta,
|
||||
bench_version_stats,
|
||||
bench_validate_integrity
|
||||
);
|
||||
|
||||
criterion_main!(benches);
|
||||
139
crates/filemeta/src/error.rs
Normal file
139
crates/filemeta/src/error.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
pub type Result<T> = core::result::Result<T, Error>;
|
||||
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
pub enum Error {
|
||||
#[error("File not found")]
|
||||
FileNotFound,
|
||||
#[error("File version not found")]
|
||||
FileVersionNotFound,
|
||||
|
||||
#[error("File corrupt")]
|
||||
FileCorrupt,
|
||||
|
||||
#[error("Done for now")]
|
||||
DoneForNow,
|
||||
|
||||
#[error("Method not allowed")]
|
||||
MethodNotAllowed,
|
||||
|
||||
#[error("I/O error: {0}")]
|
||||
Io(String),
|
||||
|
||||
#[error("rmp serde decode error: {0}")]
|
||||
RmpSerdeDecode(String),
|
||||
|
||||
#[error("rmp serde encode error: {0}")]
|
||||
RmpSerdeEncode(String),
|
||||
|
||||
#[error("Invalid UTF-8: {0}")]
|
||||
FromUtf8(String),
|
||||
|
||||
#[error("rmp decode value read error: {0}")]
|
||||
RmpDecodeValueRead(String),
|
||||
|
||||
#[error("rmp encode value write error: {0}")]
|
||||
RmpEncodeValueWrite(String),
|
||||
|
||||
#[error("rmp decode num value read error: {0}")]
|
||||
RmpDecodeNumValueRead(String),
|
||||
|
||||
#[error("rmp decode marker read error: {0}")]
|
||||
RmpDecodeMarkerRead(String),
|
||||
|
||||
#[error("time component range error: {0}")]
|
||||
TimeComponentRange(String),
|
||||
|
||||
#[error("uuid parse error: {0}")]
|
||||
UuidParse(String),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn other<E>(error: E) -> Error
|
||||
where
|
||||
E: Into<Box<dyn std::error::Error + Send + Sync>>,
|
||||
{
|
||||
std::io::Error::other(error).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Error {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(Error::FileCorrupt, Error::FileCorrupt) => true,
|
||||
(Error::DoneForNow, Error::DoneForNow) => true,
|
||||
(Error::MethodNotAllowed, Error::MethodNotAllowed) => true,
|
||||
(Error::FileNotFound, Error::FileNotFound) => true,
|
||||
(Error::FileVersionNotFound, Error::FileVersionNotFound) => true,
|
||||
(Error::Io(e1), Error::Io(e2)) => e1 == e2,
|
||||
(Error::RmpSerdeDecode(e1), Error::RmpSerdeDecode(e2)) => e1 == e2,
|
||||
(Error::RmpSerdeEncode(e1), Error::RmpSerdeEncode(e2)) => e1 == e2,
|
||||
(Error::RmpDecodeValueRead(e1), Error::RmpDecodeValueRead(e2)) => e1 == e2,
|
||||
(Error::RmpEncodeValueWrite(e1), Error::RmpEncodeValueWrite(e2)) => e1 == e2,
|
||||
(Error::RmpDecodeNumValueRead(e1), Error::RmpDecodeNumValueRead(e2)) => e1 == e2,
|
||||
(Error::TimeComponentRange(e1), Error::TimeComponentRange(e2)) => e1 == e2,
|
||||
(Error::UuidParse(e1), Error::UuidParse(e2)) => e1 == e2,
|
||||
(a, b) => a.to_string() == b.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for Error {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
Error::Io(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp_serde::decode::Error> for Error {
|
||||
fn from(e: rmp_serde::decode::Error) -> Self {
|
||||
Error::RmpSerdeDecode(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp_serde::encode::Error> for Error {
|
||||
fn from(e: rmp_serde::encode::Error) -> Self {
|
||||
Error::RmpSerdeEncode(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::string::FromUtf8Error> for Error {
|
||||
fn from(e: std::string::FromUtf8Error) -> Self {
|
||||
Error::FromUtf8(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp::decode::ValueReadError> for Error {
|
||||
fn from(e: rmp::decode::ValueReadError) -> Self {
|
||||
Error::RmpDecodeValueRead(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp::encode::ValueWriteError> for Error {
|
||||
fn from(e: rmp::encode::ValueWriteError) -> Self {
|
||||
Error::RmpEncodeValueWrite(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp::decode::NumValueReadError> for Error {
|
||||
fn from(e: rmp::decode::NumValueReadError) -> Self {
|
||||
Error::RmpDecodeNumValueRead(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<time::error::ComponentRange> for Error {
|
||||
fn from(e: time::error::ComponentRange) -> Self {
|
||||
Error::TimeComponentRange(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<uuid::Error> for Error {
|
||||
fn from(e: uuid::Error) -> Self {
|
||||
Error::UuidParse(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rmp::decode::MarkerReadError> for Error {
|
||||
fn from(e: rmp::decode::MarkerReadError) -> Self {
|
||||
let serr = format!("{:?}", e);
|
||||
Error::RmpDecodeMarkerRead(serr)
|
||||
}
|
||||
}
|
||||
438
crates/filemeta/src/fileinfo.rs
Normal file
438
crates/filemeta/src/fileinfo.rs
Normal file
@@ -0,0 +1,438 @@
|
||||
use crate::error::{Error, Result};
|
||||
use rmp_serde::Serializer;
|
||||
use rustfs_utils::HashAlgorithm;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::headers::RESERVED_METADATA_PREFIX;
|
||||
use crate::headers::RUSTFS_HEALING;
|
||||
use crate::headers::X_RUSTFS_INLINE_DATA;
|
||||
|
||||
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
|
||||
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
|
||||
|
||||
// Additional constants from Go version
|
||||
pub const NULL_VERSION_ID: &str = "null";
|
||||
// pub const RUSTFS_ERASURE_UPGRADED: &str = "x-rustfs-internal-erasure-upgraded";
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Default)]
|
||||
pub struct ObjectPartInfo {
|
||||
pub etag: String,
|
||||
pub number: usize,
|
||||
pub size: usize,
|
||||
pub actual_size: usize, // Original data size
|
||||
pub mod_time: Option<OffsetDateTime>,
|
||||
// Index holds the index of the part in the erasure coding
|
||||
pub index: Option<Vec<u8>>,
|
||||
// Checksums holds checksums of the part
|
||||
pub checksums: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Default, Clone)]
|
||||
// ChecksumInfo - carries checksums of individual scattered parts per disk.
|
||||
pub struct ChecksumInfo {
|
||||
pub part_number: usize,
|
||||
pub algorithm: HashAlgorithm,
|
||||
pub hash: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Default, Clone)]
|
||||
pub enum ErasureAlgo {
|
||||
#[default]
|
||||
Invalid = 0,
|
||||
ReedSolomon = 1,
|
||||
}
|
||||
|
||||
impl ErasureAlgo {
|
||||
pub fn valid(&self) -> bool {
|
||||
*self > ErasureAlgo::Invalid
|
||||
}
|
||||
pub fn to_u8(&self) -> u8 {
|
||||
match self {
|
||||
ErasureAlgo::Invalid => 0,
|
||||
ErasureAlgo::ReedSolomon => 1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_u8(u: u8) -> Self {
|
||||
match u {
|
||||
1 => ErasureAlgo::ReedSolomon,
|
||||
_ => ErasureAlgo::Invalid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ErasureAlgo {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ErasureAlgo::Invalid => write!(f, "Invalid"),
|
||||
ErasureAlgo::ReedSolomon => write!(f, "{}", ERASURE_ALGORITHM),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Default, Clone)]
|
||||
// ErasureInfo holds erasure coding and bitrot related information.
|
||||
pub struct ErasureInfo {
|
||||
// Algorithm is the String representation of erasure-coding-algorithm
|
||||
pub algorithm: String,
|
||||
// DataBlocks is the number of data blocks for erasure-coding
|
||||
pub data_blocks: usize,
|
||||
// ParityBlocks is the number of parity blocks for erasure-coding
|
||||
pub parity_blocks: usize,
|
||||
// BlockSize is the size of one erasure-coded block
|
||||
pub block_size: usize,
|
||||
// Index is the index of the current disk
|
||||
pub index: usize,
|
||||
// Distribution is the distribution of the data and parity blocks
|
||||
pub distribution: Vec<usize>,
|
||||
// Checksums holds all bitrot checksums of all erasure encoded blocks
|
||||
pub checksums: Vec<ChecksumInfo>,
|
||||
}
|
||||
|
||||
impl ErasureInfo {
|
||||
pub fn get_checksum_info(&self, part_number: usize) -> ChecksumInfo {
|
||||
for sum in &self.checksums {
|
||||
if sum.part_number == part_number {
|
||||
return sum.clone();
|
||||
}
|
||||
}
|
||||
|
||||
ChecksumInfo {
|
||||
algorithm: HashAlgorithm::HighwayHash256S,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the size of each shard.
|
||||
pub fn shard_size(&self) -> usize {
|
||||
self.block_size.div_ceil(self.data_blocks)
|
||||
}
|
||||
/// Calculate the total erasure file size for a given original size.
|
||||
// Returns the final erasure size from the original size
|
||||
pub fn shard_file_size(&self, total_length: usize) -> usize {
|
||||
if total_length == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let num_shards = total_length / self.block_size;
|
||||
let last_block_size = total_length % self.block_size;
|
||||
let last_shard_size = last_block_size.div_ceil(self.data_blocks);
|
||||
num_shards * self.shard_size() + last_shard_size
|
||||
}
|
||||
|
||||
/// Check if this ErasureInfo equals another ErasureInfo
|
||||
pub fn equals(&self, other: &ErasureInfo) -> bool {
|
||||
self.algorithm == other.algorithm
|
||||
&& self.data_blocks == other.data_blocks
|
||||
&& self.parity_blocks == other.parity_blocks
|
||||
&& self.block_size == other.block_size
|
||||
&& self.index == other.index
|
||||
&& self.distribution == other.distribution
|
||||
}
|
||||
}
|
||||
|
||||
// #[derive(Debug, Clone)]
|
||||
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Default)]
|
||||
pub struct FileInfo {
|
||||
pub volume: String,
|
||||
pub name: String,
|
||||
pub version_id: Option<Uuid>,
|
||||
pub is_latest: bool,
|
||||
pub deleted: bool,
|
||||
// Transition related fields
|
||||
pub transition_status: Option<String>,
|
||||
pub transitioned_obj_name: Option<String>,
|
||||
pub transition_tier: Option<String>,
|
||||
pub transition_version_id: Option<String>,
|
||||
pub expire_restored: bool,
|
||||
pub data_dir: Option<Uuid>,
|
||||
pub mod_time: Option<OffsetDateTime>,
|
||||
pub size: usize,
|
||||
// File mode bits
|
||||
pub mode: Option<u32>,
|
||||
// WrittenByVersion is the unix time stamp of the version that created this version of the object
|
||||
pub written_by_version: Option<u64>,
|
||||
pub metadata: HashMap<String, String>,
|
||||
pub parts: Vec<ObjectPartInfo>,
|
||||
pub erasure: ErasureInfo,
|
||||
// MarkDeleted marks this version as deleted
|
||||
pub mark_deleted: bool,
|
||||
// ReplicationState - Internal replication state to be passed back in ObjectInfo
|
||||
// pub replication_state: Option<ReplicationState>, // TODO: implement ReplicationState
|
||||
pub data: Option<Vec<u8>>,
|
||||
pub num_versions: usize,
|
||||
pub successor_mod_time: Option<OffsetDateTime>,
|
||||
pub fresh: bool,
|
||||
pub idx: usize,
|
||||
// Combined checksum when object was uploaded
|
||||
pub checksum: Option<Vec<u8>>,
|
||||
pub versioned: bool,
|
||||
}
|
||||
|
||||
impl FileInfo {
|
||||
pub fn new(object: &str, data_blocks: usize, parity_blocks: usize) -> Self {
|
||||
let indexs = {
|
||||
let cardinality = data_blocks + parity_blocks;
|
||||
let mut nums = vec![0; cardinality];
|
||||
let key_crc = crc32fast::hash(object.as_bytes());
|
||||
|
||||
let start = key_crc as usize % cardinality;
|
||||
for i in 1..=cardinality {
|
||||
nums[i - 1] = 1 + ((start + i) % cardinality);
|
||||
}
|
||||
|
||||
nums
|
||||
};
|
||||
Self {
|
||||
erasure: ErasureInfo {
|
||||
algorithm: String::from(ERASURE_ALGORITHM),
|
||||
data_blocks,
|
||||
parity_blocks,
|
||||
block_size: BLOCK_SIZE_V2,
|
||||
distribution: indexs,
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
if self.deleted {
|
||||
return true;
|
||||
}
|
||||
|
||||
let data_blocks = self.erasure.data_blocks;
|
||||
let parity_blocks = self.erasure.parity_blocks;
|
||||
|
||||
(data_blocks >= parity_blocks)
|
||||
&& (data_blocks > 0)
|
||||
&& (self.erasure.index > 0
|
||||
&& self.erasure.index <= data_blocks + parity_blocks
|
||||
&& self.erasure.distribution.len() == (data_blocks + parity_blocks))
|
||||
}
|
||||
|
||||
pub fn get_etag(&self) -> Option<String> {
|
||||
self.metadata.get("etag").cloned()
|
||||
}
|
||||
|
||||
pub fn write_quorum(&self, quorum: usize) -> usize {
|
||||
if self.deleted {
|
||||
return quorum;
|
||||
}
|
||||
|
||||
if self.erasure.data_blocks == self.erasure.parity_blocks {
|
||||
return self.erasure.data_blocks + 1;
|
||||
}
|
||||
|
||||
self.erasure.data_blocks
|
||||
}
|
||||
|
||||
pub fn marshal_msg(&self) -> Result<Vec<u8>> {
|
||||
let mut buf = Vec::new();
|
||||
|
||||
self.serialize(&mut Serializer::new(&mut buf))?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn unmarshal(buf: &[u8]) -> Result<Self> {
|
||||
let t: FileInfo = rmp_serde::from_slice(buf)?;
|
||||
Ok(t)
|
||||
}
|
||||
|
||||
pub fn add_object_part(
|
||||
&mut self,
|
||||
num: usize,
|
||||
etag: String,
|
||||
part_size: usize,
|
||||
mod_time: Option<OffsetDateTime>,
|
||||
actual_size: usize,
|
||||
) {
|
||||
let part = ObjectPartInfo {
|
||||
etag,
|
||||
number: num,
|
||||
size: part_size,
|
||||
mod_time,
|
||||
actual_size,
|
||||
index: None,
|
||||
checksums: None,
|
||||
};
|
||||
|
||||
for p in self.parts.iter_mut() {
|
||||
if p.number == num {
|
||||
*p = part;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.parts.push(part);
|
||||
|
||||
self.parts.sort_by(|a, b| a.number.cmp(&b.number));
|
||||
}
|
||||
|
||||
// to_part_offset gets the part index where offset is located, returns part index and offset
|
||||
pub fn to_part_offset(&self, offset: usize) -> Result<(usize, usize)> {
|
||||
if offset == 0 {
|
||||
return Ok((0, 0));
|
||||
}
|
||||
|
||||
let mut part_offset = offset;
|
||||
for (i, part) in self.parts.iter().enumerate() {
|
||||
let part_index = i;
|
||||
if part_offset < part.size {
|
||||
return Ok((part_index, part_offset));
|
||||
}
|
||||
|
||||
part_offset -= part.size
|
||||
}
|
||||
|
||||
Err(Error::other("part not found"))
|
||||
}
|
||||
|
||||
pub fn set_healing(&mut self) {
|
||||
self.metadata.insert(RUSTFS_HEALING.to_string(), "true".to_string());
|
||||
}
|
||||
|
||||
pub fn set_inline_data(&mut self) {
|
||||
self.metadata.insert(X_RUSTFS_INLINE_DATA.to_owned(), "true".to_owned());
|
||||
}
|
||||
pub fn inline_data(&self) -> bool {
|
||||
self.metadata.get(X_RUSTFS_INLINE_DATA).is_some_and(|v| v == "true")
|
||||
}
|
||||
|
||||
/// Check if the object is compressed
|
||||
pub fn is_compressed(&self) -> bool {
|
||||
self.metadata
|
||||
.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX))
|
||||
}
|
||||
|
||||
/// Check if the object is remote (transitioned to another tier)
|
||||
pub fn is_remote(&self) -> bool {
|
||||
!self.transition_tier.as_ref().map_or(true, |s| s.is_empty())
|
||||
}
|
||||
|
||||
/// Get the data directory for this object
|
||||
pub fn get_data_dir(&self) -> String {
|
||||
if self.deleted {
|
||||
return "delete-marker".to_string();
|
||||
}
|
||||
self.data_dir.map_or("".to_string(), |dir| dir.to_string())
|
||||
}
|
||||
|
||||
/// Read quorum returns expected read quorum for this FileInfo
|
||||
pub fn read_quorum(&self, dquorum: usize) -> usize {
|
||||
if self.deleted {
|
||||
return dquorum;
|
||||
}
|
||||
self.erasure.data_blocks
|
||||
}
|
||||
|
||||
/// Create a shallow copy with minimal information for READ MRF checks
|
||||
pub fn shallow_copy(&self) -> Self {
|
||||
Self {
|
||||
volume: self.volume.clone(),
|
||||
name: self.name.clone(),
|
||||
version_id: self.version_id,
|
||||
deleted: self.deleted,
|
||||
erasure: self.erasure.clone(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this FileInfo equals another FileInfo
|
||||
pub fn equals(&self, other: &FileInfo) -> bool {
|
||||
// Check if both are compressed or both are not compressed
|
||||
if self.is_compressed() != other.is_compressed() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check transition info
|
||||
if !self.transition_info_equals(other) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check mod time
|
||||
if self.mod_time != other.mod_time {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check erasure info
|
||||
self.erasure.equals(&other.erasure)
|
||||
}
|
||||
|
||||
/// Check if transition related information are equal
|
||||
pub fn transition_info_equals(&self, other: &FileInfo) -> bool {
|
||||
self.transition_status == other.transition_status
|
||||
&& self.transition_tier == other.transition_tier
|
||||
&& self.transitioned_obj_name == other.transitioned_obj_name
|
||||
&& self.transition_version_id == other.transition_version_id
|
||||
}
|
||||
|
||||
/// Check if metadata maps are equal
|
||||
pub fn metadata_equals(&self, other: &FileInfo) -> bool {
|
||||
if self.metadata.len() != other.metadata.len() {
|
||||
return false;
|
||||
}
|
||||
for (k, v) in &self.metadata {
|
||||
if other.metadata.get(k) != Some(v) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Check if replication related fields are equal
|
||||
pub fn replication_info_equals(&self, other: &FileInfo) -> bool {
|
||||
self.mark_deleted == other.mark_deleted
|
||||
// TODO: Add replication_state comparison when implemented
|
||||
// && self.replication_state == other.replication_state
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct FileInfoVersions {
|
||||
// Name of the volume.
|
||||
pub volume: String,
|
||||
|
||||
// Name of the file.
|
||||
pub name: String,
|
||||
|
||||
// Represents the latest mod time of the
|
||||
// latest version.
|
||||
pub latest_mod_time: Option<OffsetDateTime>,
|
||||
|
||||
pub versions: Vec<FileInfo>,
|
||||
pub free_versions: Vec<FileInfo>,
|
||||
}
|
||||
|
||||
impl FileInfoVersions {
|
||||
pub fn find_version_index(&self, v: &str) -> Option<usize> {
|
||||
if v.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let vid = Uuid::parse_str(v).unwrap_or_default();
|
||||
|
||||
self.versions.iter().position(|v| v.version_id == Some(vid))
|
||||
}
|
||||
|
||||
/// Calculate the total size of all versions for this object
|
||||
pub fn size(&self) -> usize {
|
||||
self.versions.iter().map(|v| v.size).sum()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct RawFileInfo {
|
||||
pub buf: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
|
||||
pub struct FilesInfo {
|
||||
pub files: Vec<FileInfo>,
|
||||
pub is_truncated: bool,
|
||||
}
|
||||
3411
crates/filemeta/src/filemeta.rs
Normal file
3411
crates/filemeta/src/filemeta.rs
Normal file
File diff suppressed because it is too large
Load Diff
242
crates/filemeta/src/filemeta_inline.rs
Normal file
242
crates/filemeta/src/filemeta_inline.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
use crate::error::{Error, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::{Cursor, Read};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InlineData(Vec<u8>);
|
||||
|
||||
const INLINE_DATA_VER: u8 = 1;
|
||||
|
||||
impl InlineData {
|
||||
pub fn new() -> Self {
|
||||
Self(Vec::new())
|
||||
}
|
||||
pub fn update(&mut self, buf: &[u8]) {
|
||||
self.0 = buf.to_vec()
|
||||
}
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
self.0.as_slice()
|
||||
}
|
||||
pub fn version_ok(&self) -> bool {
|
||||
if self.0.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
self.0[0] > 0 && self.0[0] <= INLINE_DATA_VER
|
||||
}
|
||||
|
||||
pub fn after_version(&self) -> &[u8] {
|
||||
if self.0.is_empty() {
|
||||
&self.0
|
||||
} else {
|
||||
&self.0[1..]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find(&self, key: &str) -> Result<Option<Vec<u8>>> {
|
||||
if self.0.is_empty() || !self.version_ok() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let buf = self.after_version();
|
||||
|
||||
let mut cur = Cursor::new(buf);
|
||||
|
||||
let mut fields_len = rmp::decode::read_map_len(&mut cur)?;
|
||||
|
||||
while fields_len > 0 {
|
||||
fields_len -= 1;
|
||||
|
||||
let str_len = rmp::decode::read_str_len(&mut cur)?;
|
||||
|
||||
let mut field_buff = vec![0u8; str_len as usize];
|
||||
|
||||
cur.read_exact(&mut field_buff)?;
|
||||
|
||||
let field = String::from_utf8(field_buff)?;
|
||||
|
||||
let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize;
|
||||
let start = cur.position() as usize;
|
||||
let end = start + bin_len;
|
||||
cur.set_position(end as u64);
|
||||
|
||||
if field.as_str() == key {
|
||||
let buf = &buf[start..end];
|
||||
return Ok(Some(buf.to_vec()));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> Result<()> {
|
||||
if self.0.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut cur = Cursor::new(self.after_version());
|
||||
|
||||
let mut fields_len = rmp::decode::read_map_len(&mut cur)?;
|
||||
|
||||
while fields_len > 0 {
|
||||
fields_len -= 1;
|
||||
|
||||
let str_len = rmp::decode::read_str_len(&mut cur)?;
|
||||
|
||||
let mut field_buff = vec![0u8; str_len as usize];
|
||||
|
||||
cur.read_exact(&mut field_buff)?;
|
||||
|
||||
let field = String::from_utf8(field_buff)?;
|
||||
if field.is_empty() {
|
||||
return Err(Error::other("InlineData key empty"));
|
||||
}
|
||||
|
||||
let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize;
|
||||
let start = cur.position() as usize;
|
||||
let end = start + bin_len;
|
||||
cur.set_position(end as u64);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn replace(&mut self, key: &str, value: Vec<u8>) -> Result<()> {
|
||||
if self.after_version().is_empty() {
|
||||
let mut keys = Vec::with_capacity(1);
|
||||
let mut values = Vec::with_capacity(1);
|
||||
|
||||
keys.push(key.to_owned());
|
||||
values.push(value);
|
||||
|
||||
return self.serialize(keys, values);
|
||||
}
|
||||
|
||||
let buf = self.after_version();
|
||||
let mut cur = Cursor::new(buf);
|
||||
|
||||
let mut fields_len = rmp::decode::read_map_len(&mut cur)? as usize;
|
||||
let mut keys = Vec::with_capacity(fields_len + 1);
|
||||
let mut values = Vec::with_capacity(fields_len + 1);
|
||||
|
||||
let mut replaced = false;
|
||||
|
||||
while fields_len > 0 {
|
||||
fields_len -= 1;
|
||||
|
||||
let str_len = rmp::decode::read_str_len(&mut cur)?;
|
||||
|
||||
let mut field_buff = vec![0u8; str_len as usize];
|
||||
|
||||
cur.read_exact(&mut field_buff)?;
|
||||
|
||||
let find_key = String::from_utf8(field_buff)?;
|
||||
|
||||
let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize;
|
||||
let start = cur.position() as usize;
|
||||
let end = start + bin_len;
|
||||
cur.set_position(end as u64);
|
||||
|
||||
let find_value = &buf[start..end];
|
||||
|
||||
if find_key.as_str() == key {
|
||||
values.push(value.clone());
|
||||
replaced = true
|
||||
} else {
|
||||
values.push(find_value.to_vec());
|
||||
}
|
||||
|
||||
keys.push(find_key);
|
||||
}
|
||||
|
||||
if !replaced {
|
||||
keys.push(key.to_owned());
|
||||
values.push(value);
|
||||
}
|
||||
|
||||
self.serialize(keys, values)
|
||||
}
|
||||
pub fn remove(&mut self, remove_keys: Vec<Uuid>) -> Result<bool> {
|
||||
let buf = self.after_version();
|
||||
let mut cur = Cursor::new(buf);
|
||||
|
||||
let mut fields_len = rmp::decode::read_map_len(&mut cur)? as usize;
|
||||
let mut keys = Vec::with_capacity(fields_len + 1);
|
||||
let mut values = Vec::with_capacity(fields_len + 1);
|
||||
|
||||
let remove_key = |found_key: &str| {
|
||||
for key in remove_keys.iter() {
|
||||
if key.to_string().as_str() == found_key {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
};
|
||||
|
||||
let mut found = false;
|
||||
|
||||
while fields_len > 0 {
|
||||
fields_len -= 1;
|
||||
|
||||
let str_len = rmp::decode::read_str_len(&mut cur)?;
|
||||
|
||||
let mut field_buff = vec![0u8; str_len as usize];
|
||||
|
||||
cur.read_exact(&mut field_buff)?;
|
||||
|
||||
let find_key = String::from_utf8(field_buff)?;
|
||||
|
||||
let bin_len = rmp::decode::read_bin_len(&mut cur)? as usize;
|
||||
let start = cur.position() as usize;
|
||||
let end = start + bin_len;
|
||||
cur.set_position(end as u64);
|
||||
|
||||
let find_value = &buf[start..end];
|
||||
|
||||
if !remove_key(&find_key) {
|
||||
values.push(find_value.to_vec());
|
||||
keys.push(find_key);
|
||||
} else {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if keys.is_empty() {
|
||||
self.0 = Vec::new();
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
self.serialize(keys, values)?;
|
||||
Ok(true)
|
||||
}
|
||||
fn serialize(&mut self, keys: Vec<String>, values: Vec<Vec<u8>>) -> Result<()> {
|
||||
assert_eq!(keys.len(), values.len(), "InlineData serialize: keys/values not match");
|
||||
|
||||
if keys.is_empty() {
|
||||
self.0 = Vec::new();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut wr = Vec::new();
|
||||
|
||||
wr.push(INLINE_DATA_VER);
|
||||
|
||||
let map_len = keys.len();
|
||||
|
||||
rmp::encode::write_map_len(&mut wr, map_len as u32)?;
|
||||
|
||||
for i in 0..map_len {
|
||||
rmp::encode::write_str(&mut wr, keys[i].as_str())?;
|
||||
rmp::encode::write_bin(&mut wr, values[i].as_slice())?;
|
||||
}
|
||||
|
||||
self.0 = wr;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
17
crates/filemeta/src/headers.rs
Normal file
17
crates/filemeta/src/headers.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
pub const AMZ_META_UNENCRYPTED_CONTENT_LENGTH: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Length";
|
||||
pub const AMZ_META_UNENCRYPTED_CONTENT_MD5: &str = "X-Amz-Meta-X-Amz-Unencrypted-Content-Md5";
|
||||
|
||||
pub const AMZ_STORAGE_CLASS: &str = "x-amz-storage-class";
|
||||
|
||||
pub const RESERVED_METADATA_PREFIX: &str = "X-RustFS-Internal-";
|
||||
pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-";
|
||||
|
||||
pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
|
||||
// pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
|
||||
|
||||
pub const X_RUSTFS_INLINE_DATA: &str = "x-rustfs-inline-data";
|
||||
|
||||
pub const VERSION_PURGE_STATUS_KEY: &str = "X-Rustfs-Internal-purgestatus";
|
||||
|
||||
pub const X_RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
|
||||
pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov";
|
||||
13
crates/filemeta/src/lib.rs
Normal file
13
crates/filemeta/src/lib.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
mod error;
|
||||
mod fileinfo;
|
||||
mod filemeta;
|
||||
mod filemeta_inline;
|
||||
mod headers;
|
||||
mod metacache;
|
||||
|
||||
pub mod test_data;
|
||||
|
||||
pub use fileinfo::*;
|
||||
pub use filemeta::*;
|
||||
pub use filemeta_inline::*;
|
||||
pub use metacache::*;
|
||||
874
crates/filemeta/src/metacache.rs
Normal file
874
crates/filemeta/src/metacache.rs
Normal file
@@ -0,0 +1,874 @@
|
||||
use crate::error::{Error, Result};
|
||||
use crate::{merge_file_meta_versions, FileInfo, FileInfoVersions, FileMeta, FileMetaShallowVersion, VersionType};
|
||||
use rmp::Marker;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp::Ordering;
|
||||
use std::str::from_utf8;
|
||||
use std::{
|
||||
fmt::Debug,
|
||||
future::Future,
|
||||
pin::Pin,
|
||||
ptr,
|
||||
sync::{
|
||||
atomic::{AtomicPtr, AtomicU64, Ordering as AtomicOrdering},
|
||||
Arc,
|
||||
},
|
||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use time::OffsetDateTime;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
|
||||
use tokio::spawn;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::warn;
|
||||
|
||||
const SLASH_SEPARATOR: &str = "/";
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct MetadataResolutionParams {
|
||||
pub dir_quorum: usize,
|
||||
pub obj_quorum: usize,
|
||||
pub requested_versions: usize,
|
||||
pub bucket: String,
|
||||
pub strict: bool,
|
||||
pub candidates: Vec<Vec<FileMetaShallowVersion>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct MetaCacheEntry {
|
||||
/// name is the full name of the object including prefixes
|
||||
pub name: String,
|
||||
/// Metadata. If none is present it is not an object but only a prefix.
|
||||
/// Entries without metadata will only be present in non-recursive scans.
|
||||
pub metadata: Vec<u8>,
|
||||
|
||||
/// cached contains the metadata if decoded.
|
||||
#[serde(skip)]
|
||||
pub cached: Option<FileMeta>,
|
||||
|
||||
/// Indicates the entry can be reused and only one reference to metadata is expected.
|
||||
pub reusable: bool,
|
||||
}
|
||||
|
||||
impl MetaCacheEntry {
|
||||
pub fn marshal_msg(&self) -> Result<Vec<u8>> {
|
||||
let mut wr = Vec::new();
|
||||
rmp::encode::write_bool(&mut wr, true)?;
|
||||
rmp::encode::write_str(&mut wr, &self.name)?;
|
||||
rmp::encode::write_bin(&mut wr, &self.metadata)?;
|
||||
Ok(wr)
|
||||
}
|
||||
|
||||
pub fn is_dir(&self) -> bool {
|
||||
self.metadata.is_empty() && self.name.ends_with('/')
|
||||
}
|
||||
|
||||
pub fn is_in_dir(&self, dir: &str, separator: &str) -> bool {
|
||||
if dir.is_empty() {
|
||||
let idx = self.name.find(separator);
|
||||
return idx.is_none() || idx.unwrap() == self.name.len() - separator.len();
|
||||
}
|
||||
|
||||
let ext = self.name.trim_start_matches(dir);
|
||||
|
||||
if ext.len() != self.name.len() {
|
||||
let idx = ext.find(separator);
|
||||
return idx.is_none() || idx.unwrap() == ext.len() - separator.len();
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub fn is_object(&self) -> bool {
|
||||
!self.metadata.is_empty()
|
||||
}
|
||||
|
||||
pub fn is_object_dir(&self) -> bool {
|
||||
!self.metadata.is_empty() && self.name.ends_with(SLASH_SEPARATOR)
|
||||
}
|
||||
|
||||
pub fn is_latest_delete_marker(&mut self) -> bool {
|
||||
if let Some(cached) = &self.cached {
|
||||
if cached.versions.is_empty() {
|
||||
return true;
|
||||
}
|
||||
return cached.versions[0].header.version_type == VersionType::Delete;
|
||||
}
|
||||
|
||||
if !FileMeta::is_xl2_v1_format(&self.metadata) {
|
||||
return false;
|
||||
}
|
||||
|
||||
match FileMeta::check_xl2_v1(&self.metadata) {
|
||||
Ok((meta, _, _)) => {
|
||||
if !meta.is_empty() {
|
||||
return FileMeta::is_latest_delete_marker(meta);
|
||||
}
|
||||
}
|
||||
Err(_) => return true,
|
||||
}
|
||||
|
||||
match self.xl_meta() {
|
||||
Ok(res) => {
|
||||
if res.versions.is_empty() {
|
||||
return true;
|
||||
}
|
||||
res.versions[0].header.version_type == VersionType::Delete
|
||||
}
|
||||
Err(_) => true,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
pub fn to_fileinfo(&self, bucket: &str) -> Result<FileInfo> {
|
||||
if self.is_dir() {
|
||||
return Ok(FileInfo {
|
||||
volume: bucket.to_owned(),
|
||||
name: self.name.clone(),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
if self.cached.is_some() {
|
||||
let fm = self.cached.as_ref().unwrap();
|
||||
if fm.versions.is_empty() {
|
||||
return Ok(FileInfo {
|
||||
volume: bucket.to_owned(),
|
||||
name: self.name.clone(),
|
||||
deleted: true,
|
||||
is_latest: true,
|
||||
mod_time: Some(OffsetDateTime::UNIX_EPOCH),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
let fi = fm.into_fileinfo(bucket, self.name.as_str(), "", false, false)?;
|
||||
return Ok(fi);
|
||||
}
|
||||
|
||||
let mut fm = FileMeta::new();
|
||||
fm.unmarshal_msg(&self.metadata)?;
|
||||
let fi = fm.into_fileinfo(bucket, self.name.as_str(), "", false, false)?;
|
||||
Ok(fi)
|
||||
}
|
||||
|
||||
pub fn file_info_versions(&self, bucket: &str) -> Result<FileInfoVersions> {
|
||||
if self.is_dir() {
|
||||
return Ok(FileInfoVersions {
|
||||
volume: bucket.to_string(),
|
||||
name: self.name.clone(),
|
||||
versions: vec![FileInfo {
|
||||
volume: bucket.to_string(),
|
||||
name: self.name.clone(),
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
let mut fm = FileMeta::new();
|
||||
fm.unmarshal_msg(&self.metadata)?;
|
||||
fm.into_file_info_versions(bucket, self.name.as_str(), false)
|
||||
}
|
||||
|
||||
pub fn matches(&self, other: Option<&MetaCacheEntry>, strict: bool) -> (Option<MetaCacheEntry>, bool) {
|
||||
if other.is_none() {
|
||||
return (None, false);
|
||||
}
|
||||
|
||||
let other = other.unwrap();
|
||||
if self.name != other.name {
|
||||
if self.name < other.name {
|
||||
return (Some(self.clone()), false);
|
||||
}
|
||||
return (Some(other.clone()), false);
|
||||
}
|
||||
|
||||
if other.is_dir() || self.is_dir() {
|
||||
if self.is_dir() {
|
||||
return (Some(self.clone()), other.is_dir() == self.is_dir());
|
||||
}
|
||||
return (Some(other.clone()), other.is_dir() == self.is_dir());
|
||||
}
|
||||
|
||||
let self_vers = match &self.cached {
|
||||
Some(file_meta) => file_meta.clone(),
|
||||
None => match FileMeta::load(&self.metadata) {
|
||||
Ok(meta) => meta,
|
||||
Err(_) => return (None, false),
|
||||
},
|
||||
};
|
||||
|
||||
let other_vers = match &other.cached {
|
||||
Some(file_meta) => file_meta.clone(),
|
||||
None => match FileMeta::load(&other.metadata) {
|
||||
Ok(meta) => meta,
|
||||
Err(_) => return (None, false),
|
||||
},
|
||||
};
|
||||
|
||||
if self_vers.versions.len() != other_vers.versions.len() {
|
||||
match self_vers.lastest_mod_time().cmp(&other_vers.lastest_mod_time()) {
|
||||
Ordering::Greater => return (Some(self.clone()), false),
|
||||
Ordering::Less => return (Some(other.clone()), false),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if self_vers.versions.len() > other_vers.versions.len() {
|
||||
return (Some(self.clone()), false);
|
||||
}
|
||||
return (Some(other.clone()), false);
|
||||
}
|
||||
|
||||
let mut prefer = None;
|
||||
for (s_version, o_version) in self_vers.versions.iter().zip(other_vers.versions.iter()) {
|
||||
if s_version.header != o_version.header {
|
||||
if s_version.header.has_ec() != o_version.header.has_ec() {
|
||||
// One version has EC and the other doesn't - may have been written later.
|
||||
// Compare without considering EC.
|
||||
let (mut a, mut b) = (s_version.header.clone(), o_version.header.clone());
|
||||
(a.ec_n, a.ec_m, b.ec_n, b.ec_m) = (0, 0, 0, 0);
|
||||
if a == b {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if !strict && s_version.header.matches_not_strict(&o_version.header) {
|
||||
if prefer.is_none() {
|
||||
if s_version.header.sorts_before(&o_version.header) {
|
||||
prefer = Some(self.clone());
|
||||
} else {
|
||||
prefer = Some(other.clone());
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if prefer.is_some() {
|
||||
return (prefer, false);
|
||||
}
|
||||
|
||||
if s_version.header.sorts_before(&o_version.header) {
|
||||
return (Some(self.clone()), false);
|
||||
}
|
||||
|
||||
return (Some(other.clone()), false);
|
||||
}
|
||||
}
|
||||
|
||||
if prefer.is_none() {
|
||||
prefer = Some(self.clone());
|
||||
}
|
||||
|
||||
(prefer, true)
|
||||
}
|
||||
|
||||
pub fn xl_meta(&mut self) -> Result<FileMeta> {
|
||||
if self.is_dir() {
|
||||
return Err(Error::FileNotFound);
|
||||
}
|
||||
|
||||
if let Some(meta) = &self.cached {
|
||||
Ok(meta.clone())
|
||||
} else {
|
||||
if self.metadata.is_empty() {
|
||||
return Err(Error::FileNotFound);
|
||||
}
|
||||
|
||||
let meta = FileMeta::load(&self.metadata)?;
|
||||
self.cached = Some(meta.clone());
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MetaCacheEntries(pub Vec<Option<MetaCacheEntry>>);
|
||||
|
||||
impl MetaCacheEntries {
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn as_ref(&self) -> &[Option<MetaCacheEntry>] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn resolve(&self, mut params: MetadataResolutionParams) -> Option<MetaCacheEntry> {
|
||||
if self.0.is_empty() {
|
||||
warn!("decommission_pool: entries resolve empty");
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut dir_exists = 0;
|
||||
let mut selected = None;
|
||||
|
||||
params.candidates.clear();
|
||||
let mut objs_agree = 0;
|
||||
let mut objs_valid = 0;
|
||||
|
||||
for entry in self.0.iter().flatten() {
|
||||
let mut entry = entry.clone();
|
||||
|
||||
warn!("decommission_pool: entries resolve entry {:?}", entry.name);
|
||||
if entry.name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if entry.is_dir() {
|
||||
dir_exists += 1;
|
||||
selected = Some(entry.clone());
|
||||
warn!("decommission_pool: entries resolve entry dir {:?}", entry.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
let xl = match entry.xl_meta() {
|
||||
Ok(xl) => xl,
|
||||
Err(e) => {
|
||||
warn!("decommission_pool: entries resolve entry xl_meta {:?}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
objs_valid += 1;
|
||||
params.candidates.push(xl.versions.clone());
|
||||
|
||||
if selected.is_none() {
|
||||
selected = Some(entry.clone());
|
||||
objs_agree = 1;
|
||||
warn!("decommission_pool: entries resolve entry selected {:?}", entry.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if let (prefer, true) = entry.matches(selected.as_ref(), params.strict) {
|
||||
selected = prefer;
|
||||
objs_agree += 1;
|
||||
warn!("decommission_pool: entries resolve entry prefer {:?}", entry.name);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let Some(selected) = selected else {
|
||||
warn!("decommission_pool: entries resolve entry no selected");
|
||||
return None;
|
||||
};
|
||||
|
||||
if selected.is_dir() && dir_exists >= params.dir_quorum {
|
||||
warn!("decommission_pool: entries resolve entry dir selected {:?}", selected.name);
|
||||
return Some(selected);
|
||||
}
|
||||
|
||||
// If we would never be able to reach read quorum.
|
||||
if objs_valid < params.obj_quorum {
|
||||
warn!(
|
||||
"decommission_pool: entries resolve entry not enough objects {} < {}",
|
||||
objs_valid, params.obj_quorum
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
if objs_agree == objs_valid {
|
||||
warn!("decommission_pool: entries resolve entry all agree {} == {}", objs_agree, objs_valid);
|
||||
return Some(selected);
|
||||
}
|
||||
|
||||
let Some(cached) = selected.cached else {
|
||||
warn!("decommission_pool: entries resolve entry no cached");
|
||||
return None;
|
||||
};
|
||||
|
||||
let versions = merge_file_meta_versions(params.obj_quorum, params.strict, params.requested_versions, ¶ms.candidates);
|
||||
if versions.is_empty() {
|
||||
warn!("decommission_pool: entries resolve entry no versions");
|
||||
return None;
|
||||
}
|
||||
|
||||
let metadata = match cached.marshal_msg() {
|
||||
Ok(meta) => meta,
|
||||
Err(e) => {
|
||||
warn!("decommission_pool: entries resolve entry marshal_msg {:?}", e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
// Merge if we have disagreement.
|
||||
// Create a new merged result.
|
||||
let new_selected = MetaCacheEntry {
|
||||
name: selected.name.clone(),
|
||||
cached: Some(FileMeta {
|
||||
meta_ver: cached.meta_ver,
|
||||
versions,
|
||||
..Default::default()
|
||||
}),
|
||||
reusable: true,
|
||||
metadata,
|
||||
};
|
||||
|
||||
warn!("decommission_pool: entries resolve entry selected {:?}", new_selected.name);
|
||||
Some(new_selected)
|
||||
}
|
||||
|
||||
pub fn first_found(&self) -> (Option<MetaCacheEntry>, usize) {
|
||||
(self.0.iter().find(|x| x.is_some()).cloned().unwrap_or_default(), self.0.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MetaCacheEntriesSortedResult {
|
||||
pub entries: Option<MetaCacheEntriesSorted>,
|
||||
pub err: Option<Error>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct MetaCacheEntriesSorted {
|
||||
pub o: MetaCacheEntries,
|
||||
pub list_id: Option<String>,
|
||||
pub reuse: bool,
|
||||
pub last_skipped_entry: Option<String>,
|
||||
}
|
||||
|
||||
impl MetaCacheEntriesSorted {
|
||||
pub fn entries(&self) -> Vec<&MetaCacheEntry> {
|
||||
let entries: Vec<&MetaCacheEntry> = self.o.0.iter().flatten().collect();
|
||||
entries
|
||||
}
|
||||
|
||||
pub fn forward_past(&mut self, marker: Option<String>) {
|
||||
if let Some(val) = marker {
|
||||
if let Some(idx) = self.o.0.iter().flatten().position(|v| v.name > val) {
|
||||
self.o.0 = self.o.0.split_off(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const METACACHE_STREAM_VERSION: u8 = 2;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MetacacheWriter<W> {
|
||||
wr: W,
|
||||
created: bool,
|
||||
buf: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<W: AsyncWrite + Unpin> MetacacheWriter<W> {
|
||||
pub fn new(wr: W) -> Self {
|
||||
Self {
|
||||
wr,
|
||||
created: false,
|
||||
buf: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn flush(&mut self) -> Result<()> {
|
||||
self.wr.write_all(&self.buf).await?;
|
||||
self.buf.clear();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn init(&mut self) -> Result<()> {
|
||||
if !self.created {
|
||||
rmp::encode::write_u8(&mut self.buf, METACACHE_STREAM_VERSION).map_err(|e| Error::other(format!("{:?}", e)))?;
|
||||
self.flush().await?;
|
||||
self.created = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write(&mut self, objs: &[MetaCacheEntry]) -> Result<()> {
|
||||
if objs.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.init().await?;
|
||||
|
||||
for obj in objs.iter() {
|
||||
if obj.name.is_empty() {
|
||||
return Err(Error::other("metacacheWriter: no name"));
|
||||
}
|
||||
|
||||
self.write_obj(obj).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn write_obj(&mut self, obj: &MetaCacheEntry) -> Result<()> {
|
||||
self.init().await?;
|
||||
|
||||
rmp::encode::write_bool(&mut self.buf, true).map_err(|e| Error::other(format!("{:?}", e)))?;
|
||||
rmp::encode::write_str(&mut self.buf, &obj.name).map_err(|e| Error::other(format!("{:?}", e)))?;
|
||||
rmp::encode::write_bin(&mut self.buf, &obj.metadata).map_err(|e| Error::other(format!("{:?}", e)))?;
|
||||
self.flush().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn close(&mut self) -> Result<()> {
|
||||
rmp::encode::write_bool(&mut self.buf, false).map_err(|e| Error::other(format!("{:?}", e)))?;
|
||||
self.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MetacacheReader<R> {
|
||||
rd: R,
|
||||
init: bool,
|
||||
err: Option<Error>,
|
||||
buf: Vec<u8>,
|
||||
offset: usize,
|
||||
current: Option<MetaCacheEntry>,
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + Unpin> MetacacheReader<R> {
|
||||
pub fn new(rd: R) -> Self {
|
||||
Self {
|
||||
rd,
|
||||
init: false,
|
||||
err: None,
|
||||
buf: Vec::new(),
|
||||
offset: 0,
|
||||
current: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_more(&mut self, read_size: usize) -> Result<&[u8]> {
|
||||
let ext_size = read_size + self.offset;
|
||||
|
||||
let extra = ext_size - self.offset;
|
||||
if self.buf.capacity() >= ext_size {
|
||||
// Extend the buffer if we have enough space.
|
||||
self.buf.resize(ext_size, 0);
|
||||
} else {
|
||||
self.buf.extend(vec![0u8; extra]);
|
||||
}
|
||||
|
||||
let pref = self.offset;
|
||||
|
||||
self.rd.read_exact(&mut self.buf[pref..ext_size]).await?;
|
||||
|
||||
self.offset += read_size;
|
||||
|
||||
let data = &self.buf[pref..ext_size];
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
self.buf.clear();
|
||||
self.offset = 0;
|
||||
}
|
||||
|
||||
async fn check_init(&mut self) -> Result<()> {
|
||||
if !self.init {
|
||||
let ver = match rmp::decode::read_u8(&mut self.read_more(2).await?) {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
self.err = Some(Error::other(format!("{:?}", err)));
|
||||
0
|
||||
}
|
||||
};
|
||||
match ver {
|
||||
1 | 2 => (),
|
||||
_ => {
|
||||
self.err = Some(Error::other("invalid version"));
|
||||
}
|
||||
}
|
||||
|
||||
self.init = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_str_len(&mut self) -> Result<u32> {
|
||||
let mark = match rmp::decode::read_marker(&mut self.read_more(1).await?) {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
let err: Error = err.into();
|
||||
self.err = Some(err.clone());
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
match mark {
|
||||
Marker::FixStr(size) => Ok(u32::from(size)),
|
||||
Marker::Str8 => Ok(u32::from(self.read_u8().await?)),
|
||||
Marker::Str16 => Ok(u32::from(self.read_u16().await?)),
|
||||
Marker::Str32 => Ok(self.read_u32().await?),
|
||||
_marker => Err(Error::other("str marker err")),
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_bin_len(&mut self) -> Result<u32> {
|
||||
let mark = match rmp::decode::read_marker(&mut self.read_more(1).await?) {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
let err: Error = err.into();
|
||||
self.err = Some(err.clone());
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
match mark {
|
||||
Marker::Bin8 => Ok(u32::from(self.read_u8().await?)),
|
||||
Marker::Bin16 => Ok(u32::from(self.read_u16().await?)),
|
||||
Marker::Bin32 => Ok(self.read_u32().await?),
|
||||
_ => Err(Error::other("bin marker err")),
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_u8(&mut self) -> Result<u8> {
|
||||
let buf = self.read_more(1).await?;
|
||||
Ok(u8::from_be_bytes(buf.try_into().expect("Slice with incorrect length")))
|
||||
}
|
||||
|
||||
async fn read_u16(&mut self) -> Result<u16> {
|
||||
let buf = self.read_more(2).await?;
|
||||
Ok(u16::from_be_bytes(buf.try_into().expect("Slice with incorrect length")))
|
||||
}
|
||||
|
||||
async fn read_u32(&mut self) -> Result<u32> {
|
||||
let buf = self.read_more(4).await?;
|
||||
Ok(u32::from_be_bytes(buf.try_into().expect("Slice with incorrect length")))
|
||||
}
|
||||
|
||||
pub async fn skip(&mut self, size: usize) -> Result<()> {
|
||||
self.check_init().await?;
|
||||
|
||||
if let Some(err) = &self.err {
|
||||
return Err(err.clone());
|
||||
}
|
||||
|
||||
let mut n = size;
|
||||
|
||||
if self.current.is_some() {
|
||||
n -= 1;
|
||||
self.current = None;
|
||||
}
|
||||
|
||||
while n > 0 {
|
||||
match rmp::decode::read_bool(&mut self.read_more(1).await?) {
|
||||
Ok(res) => {
|
||||
if !res {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let err: Error = err.into();
|
||||
self.err = Some(err.clone());
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let l = self.read_str_len().await?;
|
||||
let _ = self.read_more(l as usize).await?;
|
||||
let l = self.read_bin_len().await?;
|
||||
let _ = self.read_more(l as usize).await?;
|
||||
|
||||
n -= 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn peek(&mut self) -> Result<Option<MetaCacheEntry>> {
|
||||
self.check_init().await?;
|
||||
|
||||
if let Some(err) = &self.err {
|
||||
return Err(err.clone());
|
||||
}
|
||||
|
||||
match rmp::decode::read_bool(&mut self.read_more(1).await?) {
|
||||
Ok(res) => {
|
||||
if !res {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
let err: Error = err.into();
|
||||
self.err = Some(err.clone());
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
let l = self.read_str_len().await?;
|
||||
|
||||
let buf = self.read_more(l as usize).await?;
|
||||
let name_buf = buf.to_vec();
|
||||
let name = match from_utf8(&name_buf) {
|
||||
Ok(decoded) => decoded.to_owned(),
|
||||
Err(err) => {
|
||||
self.err = Some(Error::other(err.to_string()));
|
||||
return Err(Error::other(err.to_string()));
|
||||
}
|
||||
};
|
||||
|
||||
let l = self.read_bin_len().await?;
|
||||
|
||||
let buf = self.read_more(l as usize).await?;
|
||||
|
||||
let metadata = buf.to_vec();
|
||||
|
||||
self.reset();
|
||||
|
||||
let entry = Some(MetaCacheEntry {
|
||||
name,
|
||||
metadata,
|
||||
cached: None,
|
||||
reusable: false,
|
||||
});
|
||||
self.current = entry.clone();
|
||||
|
||||
Ok(entry)
|
||||
}
|
||||
|
||||
pub async fn read_all(&mut self) -> Result<Vec<MetaCacheEntry>> {
|
||||
let mut ret = Vec::new();
|
||||
|
||||
loop {
|
||||
if let Some(entry) = self.peek().await? {
|
||||
ret.push(entry);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
pub type UpdateFn<T> = Box<dyn Fn() -> Pin<Box<dyn Future<Output = Result<T>> + Send>> + Send + Sync + 'static>;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Opts {
|
||||
pub return_last_good: bool,
|
||||
pub no_wait: bool,
|
||||
}
|
||||
|
||||
pub struct Cache<T: Clone + Debug + Send> {
|
||||
update_fn: UpdateFn<T>,
|
||||
ttl: Duration,
|
||||
opts: Opts,
|
||||
val: AtomicPtr<T>,
|
||||
last_update_ms: AtomicU64,
|
||||
updating: Arc<Mutex<bool>>,
|
||||
}
|
||||
|
||||
impl<T: Clone + Debug + Send + 'static> Cache<T> {
|
||||
pub fn new(update_fn: UpdateFn<T>, ttl: Duration, opts: Opts) -> Self {
|
||||
let val = AtomicPtr::new(ptr::null_mut());
|
||||
Self {
|
||||
update_fn,
|
||||
ttl,
|
||||
opts,
|
||||
val,
|
||||
last_update_ms: AtomicU64::new(0),
|
||||
updating: Arc::new(Mutex::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unsafe_code)]
|
||||
pub async fn get(self: Arc<Self>) -> Result<T> {
|
||||
let v_ptr = self.val.load(AtomicOrdering::SeqCst);
|
||||
let v = if v_ptr.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(unsafe { (*v_ptr).clone() })
|
||||
};
|
||||
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards")
|
||||
.as_secs();
|
||||
if now - self.last_update_ms.load(AtomicOrdering::SeqCst) < self.ttl.as_secs() {
|
||||
if let Some(v) = v {
|
||||
return Ok(v);
|
||||
}
|
||||
}
|
||||
|
||||
if self.opts.no_wait && v.is_some() && now - self.last_update_ms.load(AtomicOrdering::SeqCst) < self.ttl.as_secs() * 2 {
|
||||
if self.updating.try_lock().is_ok() {
|
||||
let this = Arc::clone(&self);
|
||||
spawn(async move {
|
||||
let _ = this.update().await;
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(v.unwrap());
|
||||
}
|
||||
|
||||
let _ = self.updating.lock().await;
|
||||
|
||||
if let Ok(duration) =
|
||||
SystemTime::now().duration_since(UNIX_EPOCH + Duration::from_secs(self.last_update_ms.load(AtomicOrdering::SeqCst)))
|
||||
{
|
||||
if duration < self.ttl {
|
||||
return Ok(v.unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
match self.update().await {
|
||||
Ok(_) => {
|
||||
let v_ptr = self.val.load(AtomicOrdering::SeqCst);
|
||||
let v = if v_ptr.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(unsafe { (*v_ptr).clone() })
|
||||
};
|
||||
Ok(v.unwrap())
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
async fn update(&self) -> Result<()> {
|
||||
match (self.update_fn)().await {
|
||||
Ok(val) => {
|
||||
self.val.store(Box::into_raw(Box::new(val)), AtomicOrdering::SeqCst);
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards")
|
||||
.as_secs();
|
||||
self.last_update_ms.store(now, AtomicOrdering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
let v_ptr = self.val.load(AtomicOrdering::SeqCst);
|
||||
if self.opts.return_last_good && !v_ptr.is_null() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Cursor;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_writer() {
|
||||
let mut f = Cursor::new(Vec::new());
|
||||
let mut w = MetacacheWriter::new(&mut f);
|
||||
|
||||
let mut objs = Vec::new();
|
||||
for i in 0..10 {
|
||||
let info = MetaCacheEntry {
|
||||
name: format!("item{}", i),
|
||||
metadata: vec![0u8, 10],
|
||||
cached: None,
|
||||
reusable: false,
|
||||
};
|
||||
objs.push(info);
|
||||
}
|
||||
|
||||
w.write(&objs).await.unwrap();
|
||||
w.close().await.unwrap();
|
||||
|
||||
let data = f.into_inner();
|
||||
let nf = Cursor::new(data);
|
||||
|
||||
let mut r = MetacacheReader::new(nf);
|
||||
let nobjs = r.read_all().await.unwrap();
|
||||
|
||||
assert_eq!(objs, nobjs);
|
||||
}
|
||||
}
|
||||
292
crates/filemeta/src/test_data.rs
Normal file
292
crates/filemeta/src/test_data.rs
Normal file
@@ -0,0 +1,292 @@
|
||||
use crate::error::Result;
|
||||
use crate::filemeta::*;
|
||||
use std::collections::HashMap;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// 创建一个真实的 xl.meta 文件数据用于测试
|
||||
pub fn create_real_xlmeta() -> Result<Vec<u8>> {
|
||||
let mut fm = FileMeta::new();
|
||||
|
||||
// 创建一个真实的对象版本
|
||||
let version_id = Uuid::parse_str("01234567-89ab-cdef-0123-456789abcdef")?;
|
||||
let data_dir = Uuid::parse_str("fedcba98-7654-3210-fedc-ba9876543210")?;
|
||||
|
||||
let mut metadata = HashMap::new();
|
||||
metadata.insert("Content-Type".to_string(), "text/plain".to_string());
|
||||
metadata.insert("X-Amz-Meta-Author".to_string(), "test-user".to_string());
|
||||
metadata.insert("X-Amz-Meta-Created".to_string(), "2024-01-15T10:30:00Z".to_string());
|
||||
|
||||
let object_version = MetaObject {
|
||||
version_id: Some(version_id),
|
||||
data_dir: Some(data_dir),
|
||||
erasure_algorithm: crate::fileinfo::ErasureAlgo::ReedSolomon,
|
||||
erasure_m: 4,
|
||||
erasure_n: 2,
|
||||
erasure_block_size: 1024 * 1024, // 1MB
|
||||
erasure_index: 1,
|
||||
erasure_dist: vec![0, 1, 2, 3, 4, 5],
|
||||
bitrot_checksum_algo: ChecksumAlgo::HighwayHash,
|
||||
part_numbers: vec![1],
|
||||
part_etags: vec!["d41d8cd98f00b204e9800998ecf8427e".to_string()],
|
||||
part_sizes: vec![1024],
|
||||
part_actual_sizes: vec![1024],
|
||||
part_indices: Vec::new(),
|
||||
size: 1024,
|
||||
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200)?), // 2024-01-15 10:30:00 UTC
|
||||
meta_sys: HashMap::new(),
|
||||
meta_user: metadata,
|
||||
};
|
||||
|
||||
let file_version = FileMetaVersion {
|
||||
version_type: VersionType::Object,
|
||||
object: Some(object_version),
|
||||
delete_marker: None,
|
||||
write_version: 1,
|
||||
};
|
||||
|
||||
let shallow_version = FileMetaShallowVersion::try_from(file_version)?;
|
||||
fm.versions.push(shallow_version);
|
||||
|
||||
// 添加一个删除标记版本
|
||||
let delete_version_id = Uuid::parse_str("11111111-2222-3333-4444-555555555555")?;
|
||||
let delete_marker = MetaDeleteMarker {
|
||||
version_id: Some(delete_version_id),
|
||||
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312260)?), // 1分钟后
|
||||
meta_sys: None,
|
||||
};
|
||||
|
||||
let delete_file_version = FileMetaVersion {
|
||||
version_type: VersionType::Delete,
|
||||
object: None,
|
||||
delete_marker: Some(delete_marker),
|
||||
write_version: 2,
|
||||
};
|
||||
|
||||
let delete_shallow_version = FileMetaShallowVersion::try_from(delete_file_version)?;
|
||||
fm.versions.push(delete_shallow_version);
|
||||
|
||||
// 添加一个 Legacy 版本用于测试
|
||||
let legacy_version_id = Uuid::parse_str("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee")?;
|
||||
let legacy_version = FileMetaVersion {
|
||||
version_type: VersionType::Legacy,
|
||||
object: None,
|
||||
delete_marker: None,
|
||||
write_version: 3,
|
||||
};
|
||||
|
||||
let mut legacy_shallow = FileMetaShallowVersion::try_from(legacy_version)?;
|
||||
legacy_shallow.header.version_id = Some(legacy_version_id);
|
||||
legacy_shallow.header.mod_time = Some(OffsetDateTime::from_unix_timestamp(1705312140)?); // 更早的时间
|
||||
fm.versions.push(legacy_shallow);
|
||||
|
||||
// 按修改时间排序(最新的在前)
|
||||
fm.versions.sort_by(|a, b| b.header.mod_time.cmp(&a.header.mod_time));
|
||||
|
||||
fm.marshal_msg()
|
||||
}
|
||||
|
||||
/// 创建一个包含多个版本的复杂 xl.meta 文件
|
||||
pub fn create_complex_xlmeta() -> Result<Vec<u8>> {
|
||||
let mut fm = FileMeta::new();
|
||||
|
||||
// 创建10个版本的对象
|
||||
for i in 0..10 {
|
||||
let version_id = Uuid::new_v4();
|
||||
let data_dir = if i % 3 == 0 { Some(Uuid::new_v4()) } else { None };
|
||||
|
||||
let mut metadata = HashMap::new();
|
||||
metadata.insert("Content-Type".to_string(), "application/octet-stream".to_string());
|
||||
metadata.insert("X-Amz-Meta-Version".to_string(), i.to_string());
|
||||
metadata.insert("X-Amz-Meta-Test".to_string(), format!("test-value-{}", i));
|
||||
|
||||
let object_version = MetaObject {
|
||||
version_id: Some(version_id),
|
||||
data_dir,
|
||||
erasure_algorithm: crate::fileinfo::ErasureAlgo::ReedSolomon,
|
||||
erasure_m: 4,
|
||||
erasure_n: 2,
|
||||
erasure_block_size: 1024 * 1024,
|
||||
erasure_index: (i % 6) as usize,
|
||||
erasure_dist: vec![0, 1, 2, 3, 4, 5],
|
||||
bitrot_checksum_algo: ChecksumAlgo::HighwayHash,
|
||||
part_numbers: vec![1],
|
||||
part_etags: vec![format!("etag-{:08x}", i)],
|
||||
part_sizes: vec![1024 * (i + 1) as usize],
|
||||
part_actual_sizes: vec![1024 * (i + 1) as usize],
|
||||
part_indices: Vec::new(),
|
||||
size: 1024 * (i + 1) as usize,
|
||||
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200 + i * 60)?),
|
||||
meta_sys: HashMap::new(),
|
||||
meta_user: metadata,
|
||||
};
|
||||
|
||||
let file_version = FileMetaVersion {
|
||||
version_type: VersionType::Object,
|
||||
object: Some(object_version),
|
||||
delete_marker: None,
|
||||
write_version: (i + 1) as u64,
|
||||
};
|
||||
|
||||
let shallow_version = FileMetaShallowVersion::try_from(file_version)?;
|
||||
fm.versions.push(shallow_version);
|
||||
|
||||
// 每隔3个版本添加一个删除标记
|
||||
if i % 3 == 2 {
|
||||
let delete_version_id = Uuid::new_v4();
|
||||
let delete_marker = MetaDeleteMarker {
|
||||
version_id: Some(delete_version_id),
|
||||
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200 + i * 60 + 30)?),
|
||||
meta_sys: None,
|
||||
};
|
||||
|
||||
let delete_file_version = FileMetaVersion {
|
||||
version_type: VersionType::Delete,
|
||||
object: None,
|
||||
delete_marker: Some(delete_marker),
|
||||
write_version: (i + 100) as u64,
|
||||
};
|
||||
|
||||
let delete_shallow_version = FileMetaShallowVersion::try_from(delete_file_version)?;
|
||||
fm.versions.push(delete_shallow_version);
|
||||
}
|
||||
}
|
||||
|
||||
// 按修改时间排序(最新的在前)
|
||||
fm.versions.sort_by(|a, b| b.header.mod_time.cmp(&a.header.mod_time));
|
||||
|
||||
fm.marshal_msg()
|
||||
}
|
||||
|
||||
/// 创建一个损坏的 xl.meta 文件用于错误处理测试
|
||||
pub fn create_corrupted_xlmeta() -> Vec<u8> {
|
||||
let mut data = vec![
|
||||
// 正确的文件头
|
||||
b'X', b'L', b'2', b' ', // 版本号
|
||||
1, 0, 3, 0, // 版本号
|
||||
0xc6, 0x00, 0x00, 0x00, 0x10, // 正确的 bin32 长度标记,但数据长度不匹配
|
||||
];
|
||||
|
||||
// 添加不足的数据(少于声明的长度)
|
||||
data.extend_from_slice(&[0x42; 8]); // 只有8字节,但声明了16字节
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
/// 创建一个空的 xl.meta 文件
|
||||
pub fn create_empty_xlmeta() -> Result<Vec<u8>> {
|
||||
let fm = FileMeta::new();
|
||||
fm.marshal_msg()
|
||||
}
|
||||
|
||||
/// 验证解析结果的辅助函数
|
||||
pub fn verify_parsed_metadata(fm: &FileMeta, expected_versions: usize) -> Result<()> {
|
||||
assert_eq!(fm.versions.len(), expected_versions, "版本数量不匹配");
|
||||
assert_eq!(fm.meta_ver, crate::filemeta::XL_META_VERSION, "元数据版本不匹配");
|
||||
|
||||
// 验证版本是否按修改时间排序
|
||||
for i in 1..fm.versions.len() {
|
||||
let prev_time = fm.versions[i - 1].header.mod_time;
|
||||
let curr_time = fm.versions[i].header.mod_time;
|
||||
|
||||
if let (Some(prev), Some(curr)) = (prev_time, curr_time) {
|
||||
assert!(prev >= curr, "版本未按修改时间正确排序");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 创建一个包含内联数据的 xl.meta 文件
|
||||
pub fn create_xlmeta_with_inline_data() -> Result<Vec<u8>> {
|
||||
let mut fm = FileMeta::new();
|
||||
|
||||
// 添加内联数据
|
||||
let inline_data = b"This is inline data for testing purposes";
|
||||
let version_id = Uuid::new_v4();
|
||||
fm.data.replace(&version_id.to_string(), inline_data.to_vec())?;
|
||||
|
||||
let object_version = MetaObject {
|
||||
version_id: Some(version_id),
|
||||
data_dir: None,
|
||||
erasure_algorithm: crate::fileinfo::ErasureAlgo::ReedSolomon,
|
||||
erasure_m: 1,
|
||||
erasure_n: 1,
|
||||
erasure_block_size: 64 * 1024,
|
||||
erasure_index: 0,
|
||||
erasure_dist: vec![0, 1],
|
||||
bitrot_checksum_algo: ChecksumAlgo::HighwayHash,
|
||||
part_numbers: vec![1],
|
||||
part_etags: Vec::new(),
|
||||
part_sizes: vec![inline_data.len()],
|
||||
part_actual_sizes: Vec::new(),
|
||||
part_indices: Vec::new(),
|
||||
size: inline_data.len(),
|
||||
mod_time: Some(OffsetDateTime::now_utc()),
|
||||
meta_sys: HashMap::new(),
|
||||
meta_user: HashMap::new(),
|
||||
};
|
||||
|
||||
let file_version = FileMetaVersion {
|
||||
version_type: VersionType::Object,
|
||||
object: Some(object_version),
|
||||
delete_marker: None,
|
||||
write_version: 1,
|
||||
};
|
||||
|
||||
let shallow_version = FileMetaShallowVersion::try_from(file_version)?;
|
||||
fm.versions.push(shallow_version);
|
||||
|
||||
fm.marshal_msg()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_real_xlmeta() {
|
||||
let data = create_real_xlmeta().expect("创建测试数据失败");
|
||||
assert!(!data.is_empty(), "生成的数据不应为空");
|
||||
|
||||
// 验证文件头
|
||||
assert_eq!(&data[0..4], b"XL2 ", "文件头不正确");
|
||||
|
||||
// 尝试解析
|
||||
let fm = FileMeta::load(&data).expect("解析失败");
|
||||
verify_parsed_metadata(&fm, 3).expect("验证失败");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_complex_xlmeta() {
|
||||
let data = create_complex_xlmeta().expect("创建复杂测试数据失败");
|
||||
assert!(!data.is_empty(), "生成的数据不应为空");
|
||||
|
||||
let fm = FileMeta::load(&data).expect("解析失败");
|
||||
assert!(fm.versions.len() >= 10, "应该有至少10个版本");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_xlmeta_with_inline_data() {
|
||||
let data = create_xlmeta_with_inline_data().expect("创建内联数据测试失败");
|
||||
assert!(!data.is_empty(), "生成的数据不应为空");
|
||||
|
||||
let fm = FileMeta::load(&data).expect("解析失败");
|
||||
assert_eq!(fm.versions.len(), 1, "应该有1个版本");
|
||||
assert!(!fm.data.as_slice().is_empty(), "应该包含内联数据");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_corrupted_xlmeta_handling() {
|
||||
let data = create_corrupted_xlmeta();
|
||||
let result = FileMeta::load(&data);
|
||||
assert!(result.is_err(), "损坏的数据应该解析失败");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_xlmeta() {
|
||||
let data = create_empty_xlmeta().expect("创建空测试数据失败");
|
||||
let fm = FileMeta::load(&data).expect("解析空数据失败");
|
||||
assert_eq!(fm.versions.len(), 0, "空文件应该没有版本");
|
||||
}
|
||||
}
|
||||
36
crates/rio/Cargo.toml
Normal file
36
crates/rio/Cargo.toml
Normal file
@@ -0,0 +1,36 @@
|
||||
[package]
|
||||
name = "rustfs-rio"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
rust-version.workspace = true
|
||||
version.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
rand = { workspace = true }
|
||||
md-5 = { workspace = true }
|
||||
http.workspace = true
|
||||
flate2 = "1.1.1"
|
||||
aes-gcm = "0.10.3"
|
||||
crc32fast = "1.4.2"
|
||||
pin-project-lite.workspace = true
|
||||
async-trait.workspace = true
|
||||
base64-simd = "0.8.0"
|
||||
hex-simd = "0.8.0"
|
||||
zstd = "0.13.3"
|
||||
lz4 = "1.28.1"
|
||||
brotli = "8.0.1"
|
||||
snap = "1.1.1"
|
||||
|
||||
bytes.workspace = true
|
||||
reqwest.workspace = true
|
||||
tokio-util.workspace = true
|
||||
futures.workspace = true
|
||||
rustfs-utils = {workspace = true, features= ["io","hash"]}
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["async", "async_tokio", "tokio"] }
|
||||
325
crates/rio/src/bitrot.rs
Normal file
325
crates/rio/src/bitrot.rs
Normal file
@@ -0,0 +1,325 @@
|
||||
use crate::{Reader, Writer};
|
||||
use pin_project_lite::pin_project;
|
||||
use rustfs_utils::{read_full, write_all, HashAlgorithm};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
|
||||
pin_project! {
|
||||
/// BitrotReader reads (hash+data) blocks from an async reader and verifies hash integrity.
|
||||
pub struct BitrotReader {
|
||||
#[pin]
|
||||
inner: Box<dyn Reader>,
|
||||
hash_algo: HashAlgorithm,
|
||||
shard_size: usize,
|
||||
buf: Vec<u8>,
|
||||
hash_buf: Vec<u8>,
|
||||
hash_read: usize,
|
||||
data_buf: Vec<u8>,
|
||||
data_read: usize,
|
||||
hash_checked: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl BitrotReader {
|
||||
/// Get a reference to the underlying reader.
|
||||
pub fn get_ref(&self) -> &dyn Reader {
|
||||
&*self.inner
|
||||
}
|
||||
|
||||
/// Create a new BitrotReader.
|
||||
pub fn new(inner: Box<dyn Reader>, shard_size: usize, algo: HashAlgorithm) -> Self {
|
||||
let hash_size = algo.size();
|
||||
Self {
|
||||
inner,
|
||||
hash_algo: algo,
|
||||
shard_size,
|
||||
buf: Vec::new(),
|
||||
hash_buf: vec![0u8; hash_size],
|
||||
hash_read: 0,
|
||||
data_buf: Vec::new(),
|
||||
data_read: 0,
|
||||
hash_checked: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a single (hash+data) block, verify hash, and return the number of bytes read into `out`.
|
||||
/// Returns an error if hash verification fails or data exceeds shard_size.
|
||||
pub async fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
|
||||
if out.len() > self.shard_size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("data size {} exceeds shard size {}", out.len(), self.shard_size),
|
||||
));
|
||||
}
|
||||
|
||||
let hash_size = self.hash_algo.size();
|
||||
// Read hash
|
||||
let mut hash_buf = vec![0u8; hash_size];
|
||||
if hash_size > 0 {
|
||||
self.inner.read_exact(&mut hash_buf).await?;
|
||||
}
|
||||
|
||||
let data_len = read_full(&mut self.inner, out).await?;
|
||||
|
||||
// // Read data
|
||||
// let mut data_len = 0;
|
||||
// while data_len < out.len() {
|
||||
// let n = self.inner.read(&mut out[data_len..]).await?;
|
||||
// if n == 0 {
|
||||
// break;
|
||||
// }
|
||||
// data_len += n;
|
||||
// // Only read up to one shard_size block
|
||||
// if data_len >= self.shard_size {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
if hash_size > 0 {
|
||||
let actual_hash = self.hash_algo.hash_encode(&out[..data_len]);
|
||||
if actual_hash != hash_buf {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "bitrot hash mismatch"));
|
||||
}
|
||||
}
|
||||
Ok(data_len)
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// BitrotWriter writes (hash+data) blocks to an async writer.
|
||||
pub struct BitrotWriter {
|
||||
#[pin]
|
||||
inner: Writer,
|
||||
hash_algo: HashAlgorithm,
|
||||
shard_size: usize,
|
||||
buf: Vec<u8>,
|
||||
finished: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl BitrotWriter {
|
||||
/// Create a new BitrotWriter.
|
||||
pub fn new(inner: Writer, shard_size: usize, algo: HashAlgorithm) -> Self {
|
||||
let hash_algo = algo;
|
||||
Self {
|
||||
inner,
|
||||
hash_algo,
|
||||
shard_size,
|
||||
buf: Vec::new(),
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Writer {
|
||||
self.inner
|
||||
}
|
||||
|
||||
/// Write a (hash+data) block. Returns the number of data bytes written.
|
||||
/// Returns an error if called after a short write or if data exceeds shard_size.
|
||||
pub async fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
if buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if self.finished {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "bitrot writer already finished"));
|
||||
}
|
||||
|
||||
if buf.len() > self.shard_size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("data size {} exceeds shard size {}", buf.len(), self.shard_size),
|
||||
));
|
||||
}
|
||||
|
||||
if buf.len() < self.shard_size {
|
||||
self.finished = true;
|
||||
}
|
||||
|
||||
let hash_algo = &self.hash_algo;
|
||||
|
||||
if hash_algo.size() > 0 {
|
||||
let hash = hash_algo.hash_encode(buf);
|
||||
self.buf.extend_from_slice(&hash);
|
||||
}
|
||||
|
||||
self.buf.extend_from_slice(buf);
|
||||
|
||||
// Write hash+data in one call
|
||||
let mut n = write_all(&mut self.inner, &self.buf).await?;
|
||||
|
||||
if n < hash_algo.size() {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::WriteZero,
|
||||
"short write: not enough bytes written",
|
||||
));
|
||||
}
|
||||
|
||||
n -= hash_algo.size();
|
||||
|
||||
self.buf.clear();
|
||||
|
||||
Ok(n)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bitrot_shard_file_size(size: usize, shard_size: usize, algo: HashAlgorithm) -> usize {
|
||||
if algo != HashAlgorithm::HighwayHash256S {
|
||||
return size;
|
||||
}
|
||||
size.div_ceil(shard_size) * algo.size() + size
|
||||
}
|
||||
|
||||
pub async fn bitrot_verify<R: AsyncRead + Unpin + Send>(
|
||||
mut r: R,
|
||||
want_size: usize,
|
||||
part_size: usize,
|
||||
algo: HashAlgorithm,
|
||||
_want: Vec<u8>,
|
||||
mut shard_size: usize,
|
||||
) -> std::io::Result<()> {
|
||||
let mut hash_buf = vec![0; algo.size()];
|
||||
let mut left = want_size;
|
||||
|
||||
if left != bitrot_shard_file_size(part_size, shard_size, algo.clone()) {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "bitrot shard file size mismatch"));
|
||||
}
|
||||
|
||||
while left > 0 {
|
||||
let n = r.read_exact(&mut hash_buf).await?;
|
||||
left -= n;
|
||||
|
||||
if left < shard_size {
|
||||
shard_size = left;
|
||||
}
|
||||
|
||||
let mut buf = vec![0; shard_size];
|
||||
let read = r.read_exact(&mut buf).await?;
|
||||
|
||||
let actual_hash = algo.hash_encode(&buf);
|
||||
if actual_hash != hash_buf[0..n] {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "bitrot hash mismatch"));
|
||||
}
|
||||
|
||||
left -= read;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::{BitrotReader, BitrotWriter, Writer};
|
||||
use rustfs_utils::HashAlgorithm;
|
||||
use std::io::Cursor;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bitrot_read_write_ok() {
|
||||
let data = b"hello world! this is a test shard.";
|
||||
let data_size = data.len();
|
||||
let shard_size = 8;
|
||||
|
||||
let buf = Vec::new();
|
||||
let writer = Cursor::new(buf);
|
||||
let mut bitrot_writer = BitrotWriter::new(Writer::from_cursor(writer), shard_size, HashAlgorithm::HighwayHash256);
|
||||
|
||||
let mut n = 0;
|
||||
for chunk in data.chunks(shard_size) {
|
||||
n += bitrot_writer.write(chunk).await.unwrap();
|
||||
}
|
||||
assert_eq!(n, data.len());
|
||||
|
||||
// 读
|
||||
let reader = Cursor::new(bitrot_writer.into_inner().into_cursor_inner().unwrap());
|
||||
let reader = Box::new(reader);
|
||||
let mut bitrot_reader = BitrotReader::new(reader, shard_size, HashAlgorithm::HighwayHash256);
|
||||
let mut out = Vec::new();
|
||||
let mut n = 0;
|
||||
while n < data_size {
|
||||
let mut buf = vec![0u8; shard_size];
|
||||
let m = bitrot_reader.read(&mut buf).await.unwrap();
|
||||
assert_eq!(&buf[..m], &data[n..n + m]);
|
||||
|
||||
out.extend_from_slice(&buf[..m]);
|
||||
n += m;
|
||||
}
|
||||
|
||||
assert_eq!(n, data_size);
|
||||
assert_eq!(data, &out[..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bitrot_read_hash_mismatch() {
|
||||
let data = b"test data for bitrot";
|
||||
let data_size = data.len();
|
||||
let shard_size = 8;
|
||||
let buf = Vec::new();
|
||||
let writer = Cursor::new(buf);
|
||||
let mut bitrot_writer = BitrotWriter::new(Writer::from_cursor(writer), shard_size, HashAlgorithm::HighwayHash256);
|
||||
for chunk in data.chunks(shard_size) {
|
||||
let _ = bitrot_writer.write(chunk).await.unwrap();
|
||||
}
|
||||
let mut written = bitrot_writer.into_inner().into_cursor_inner().unwrap();
|
||||
// change the last byte to make hash mismatch
|
||||
let pos = written.len() - 1;
|
||||
written[pos] ^= 0xFF;
|
||||
let reader = Cursor::new(written);
|
||||
let reader = Box::new(reader);
|
||||
let mut bitrot_reader = BitrotReader::new(reader, shard_size, HashAlgorithm::HighwayHash256);
|
||||
|
||||
let count = data_size.div_ceil(shard_size);
|
||||
|
||||
let mut idx = 0;
|
||||
let mut n = 0;
|
||||
while n < data_size {
|
||||
let mut buf = vec![0u8; shard_size];
|
||||
let res = bitrot_reader.read(&mut buf).await;
|
||||
|
||||
if idx == count - 1 {
|
||||
// 最后一个块,应该返回错误
|
||||
assert!(res.is_err());
|
||||
assert_eq!(res.unwrap_err().kind(), std::io::ErrorKind::InvalidData);
|
||||
break;
|
||||
}
|
||||
|
||||
let m = res.unwrap();
|
||||
|
||||
assert_eq!(&buf[..m], &data[n..n + m]);
|
||||
|
||||
n += m;
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_bitrot_read_write_none_hash() {
|
||||
let data = b"bitrot none hash test data!";
|
||||
let data_size = data.len();
|
||||
let shard_size = 8;
|
||||
|
||||
let buf = Vec::new();
|
||||
let writer = Cursor::new(buf);
|
||||
let mut bitrot_writer = BitrotWriter::new(Writer::from_cursor(writer), shard_size, HashAlgorithm::None);
|
||||
|
||||
let mut n = 0;
|
||||
for chunk in data.chunks(shard_size) {
|
||||
n += bitrot_writer.write(chunk).await.unwrap();
|
||||
}
|
||||
assert_eq!(n, data.len());
|
||||
|
||||
let reader = Cursor::new(bitrot_writer.into_inner().into_cursor_inner().unwrap());
|
||||
let reader = Box::new(reader);
|
||||
let mut bitrot_reader = BitrotReader::new(reader, shard_size, HashAlgorithm::None);
|
||||
let mut out = Vec::new();
|
||||
let mut n = 0;
|
||||
while n < data_size {
|
||||
let mut buf = vec![0u8; shard_size];
|
||||
let m = bitrot_reader.read(&mut buf).await.unwrap();
|
||||
assert_eq!(&buf[..m], &data[n..n + m]);
|
||||
out.extend_from_slice(&buf[..m]);
|
||||
n += m;
|
||||
}
|
||||
assert_eq!(n, data_size);
|
||||
assert_eq!(data, &out[..]);
|
||||
}
|
||||
}
|
||||
270
crates/rio/src/compress.rs
Normal file
270
crates/rio/src/compress.rs
Normal file
@@ -0,0 +1,270 @@
|
||||
use http::HeaderMap;
|
||||
use std::io::Write;
|
||||
use tokio::io;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
|
||||
pub enum CompressionAlgorithm {
|
||||
Gzip,
|
||||
#[default]
|
||||
Deflate,
|
||||
Zstd,
|
||||
Lz4,
|
||||
Brotli,
|
||||
Snappy,
|
||||
}
|
||||
|
||||
impl CompressionAlgorithm {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
CompressionAlgorithm::Gzip => "gzip",
|
||||
CompressionAlgorithm::Deflate => "deflate",
|
||||
CompressionAlgorithm::Zstd => "zstd",
|
||||
CompressionAlgorithm::Lz4 => "lz4",
|
||||
CompressionAlgorithm::Brotli => "brotli",
|
||||
CompressionAlgorithm::Snappy => "snappy",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CompressionAlgorithm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
impl std::str::FromStr for CompressionAlgorithm {
|
||||
type Err = std::io::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"gzip" => Ok(CompressionAlgorithm::Gzip),
|
||||
"deflate" => Ok(CompressionAlgorithm::Deflate),
|
||||
"zstd" => Ok(CompressionAlgorithm::Zstd),
|
||||
"lz4" => Ok(CompressionAlgorithm::Lz4),
|
||||
"brotli" => Ok(CompressionAlgorithm::Brotli),
|
||||
"snappy" => Ok(CompressionAlgorithm::Snappy),
|
||||
_ => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("Unsupported compression algorithm: {}", s),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compress_block(input: &[u8], algorithm: CompressionAlgorithm) -> Vec<u8> {
|
||||
match algorithm {
|
||||
CompressionAlgorithm::Gzip => {
|
||||
let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());
|
||||
let _ = encoder.write_all(input);
|
||||
let _ = encoder.flush();
|
||||
encoder.finish().unwrap_or_default()
|
||||
}
|
||||
CompressionAlgorithm::Deflate => {
|
||||
let mut encoder = flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
|
||||
let _ = encoder.write_all(input);
|
||||
let _ = encoder.flush();
|
||||
encoder.finish().unwrap_or_default()
|
||||
}
|
||||
CompressionAlgorithm::Zstd => {
|
||||
let mut encoder = zstd::Encoder::new(Vec::new(), 0).expect("zstd encoder");
|
||||
let _ = encoder.write_all(input);
|
||||
encoder.finish().unwrap_or_default()
|
||||
}
|
||||
CompressionAlgorithm::Lz4 => {
|
||||
let mut encoder = lz4::EncoderBuilder::new().build(Vec::new()).expect("lz4 encoder");
|
||||
let _ = encoder.write_all(input);
|
||||
let (out, result) = encoder.finish();
|
||||
result.expect("lz4 finish");
|
||||
out
|
||||
}
|
||||
CompressionAlgorithm::Brotli => {
|
||||
let mut out = Vec::new();
|
||||
brotli::CompressorWriter::new(&mut out, 4096, 5, 22)
|
||||
.write_all(input)
|
||||
.expect("brotli compress");
|
||||
out
|
||||
}
|
||||
CompressionAlgorithm::Snappy => {
|
||||
let mut encoder = snap::write::FrameEncoder::new(Vec::new());
|
||||
let _ = encoder.write_all(input);
|
||||
encoder.into_inner().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decompress_block(compressed: &[u8], algorithm: CompressionAlgorithm) -> io::Result<Vec<u8>> {
|
||||
match algorithm {
|
||||
CompressionAlgorithm::Gzip => {
|
||||
let mut decoder = flate2::read::GzDecoder::new(std::io::Cursor::new(compressed));
|
||||
let mut out = Vec::new();
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::Deflate => {
|
||||
let mut decoder = flate2::read::DeflateDecoder::new(std::io::Cursor::new(compressed));
|
||||
let mut out = Vec::new();
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::Zstd => {
|
||||
let mut decoder = zstd::Decoder::new(std::io::Cursor::new(compressed))?;
|
||||
let mut out = Vec::new();
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::Lz4 => {
|
||||
let mut decoder = lz4::Decoder::new(std::io::Cursor::new(compressed)).expect("lz4 decoder");
|
||||
let mut out = Vec::new();
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::Brotli => {
|
||||
let mut out = Vec::new();
|
||||
let mut decoder = brotli::Decompressor::new(std::io::Cursor::new(compressed), 4096);
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::Snappy => {
|
||||
let mut decoder = snap::read::FrameDecoder::new(std::io::Cursor::new(compressed));
|
||||
let mut out = Vec::new();
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const MIN_COMPRESSIBLE_SIZE: i64 = 4096;
|
||||
|
||||
pub fn is_compressible(_headers: &HeaderMap) -> bool {
|
||||
// TODO: Implement this function
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_gzip() {
|
||||
let data = b"hello gzip compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Gzip);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Gzip).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_deflate() {
|
||||
let data = b"hello deflate compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Deflate);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Deflate).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_zstd() {
|
||||
let data = b"hello zstd compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Zstd);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Zstd).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_lz4() {
|
||||
let data = b"hello lz4 compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Lz4);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Lz4).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_brotli() {
|
||||
let data = b"hello brotli compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Brotli);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Brotli).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_snappy() {
|
||||
let data = b"hello snappy compress";
|
||||
let compressed = compress_block(data, CompressionAlgorithm::Snappy);
|
||||
let decompressed = decompress_block(&compressed, CompressionAlgorithm::Snappy).unwrap();
|
||||
assert_eq!(decompressed, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
assert_eq!(CompressionAlgorithm::from_str("gzip").unwrap(), CompressionAlgorithm::Gzip);
|
||||
assert_eq!(CompressionAlgorithm::from_str("deflate").unwrap(), CompressionAlgorithm::Deflate);
|
||||
assert_eq!(CompressionAlgorithm::from_str("zstd").unwrap(), CompressionAlgorithm::Zstd);
|
||||
assert_eq!(CompressionAlgorithm::from_str("lz4").unwrap(), CompressionAlgorithm::Lz4);
|
||||
assert_eq!(CompressionAlgorithm::from_str("brotli").unwrap(), CompressionAlgorithm::Brotli);
|
||||
assert_eq!(CompressionAlgorithm::from_str("snappy").unwrap(), CompressionAlgorithm::Snappy);
|
||||
assert!(CompressionAlgorithm::from_str("unknown").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare_compression_algorithms() {
|
||||
use std::time::Instant;
|
||||
let data = vec![42u8; 1024 * 100]; // 100KB of repetitive data
|
||||
|
||||
// let mut data = vec![0u8; 1024 * 1024];
|
||||
// rand::thread_rng().fill(&mut data[..]);
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut times = Vec::new();
|
||||
times.push(("original", start.elapsed(), data.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let gzip = compress_block(&data, CompressionAlgorithm::Gzip);
|
||||
let gzip_time = start.elapsed();
|
||||
times.push(("gzip", gzip_time, gzip.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let deflate = compress_block(&data, CompressionAlgorithm::Deflate);
|
||||
let deflate_time = start.elapsed();
|
||||
times.push(("deflate", deflate_time, deflate.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let zstd = compress_block(&data, CompressionAlgorithm::Zstd);
|
||||
let zstd_time = start.elapsed();
|
||||
times.push(("zstd", zstd_time, zstd.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let lz4 = compress_block(&data, CompressionAlgorithm::Lz4);
|
||||
let lz4_time = start.elapsed();
|
||||
times.push(("lz4", lz4_time, lz4.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let brotli = compress_block(&data, CompressionAlgorithm::Brotli);
|
||||
let brotli_time = start.elapsed();
|
||||
times.push(("brotli", brotli_time, brotli.len()));
|
||||
|
||||
let start = Instant::now();
|
||||
let snappy = compress_block(&data, CompressionAlgorithm::Snappy);
|
||||
let snappy_time = start.elapsed();
|
||||
times.push(("snappy", snappy_time, snappy.len()));
|
||||
|
||||
println!("Compression results:");
|
||||
for (name, dur, size) in × {
|
||||
println!("{}: {} bytes, {:?}", name, size, dur);
|
||||
}
|
||||
// All should decompress to the original
|
||||
assert_eq!(decompress_block(&gzip, CompressionAlgorithm::Gzip).unwrap(), data);
|
||||
assert_eq!(decompress_block(&deflate, CompressionAlgorithm::Deflate).unwrap(), data);
|
||||
assert_eq!(decompress_block(&zstd, CompressionAlgorithm::Zstd).unwrap(), data);
|
||||
assert_eq!(decompress_block(&lz4, CompressionAlgorithm::Lz4).unwrap(), data);
|
||||
assert_eq!(decompress_block(&brotli, CompressionAlgorithm::Brotli).unwrap(), data);
|
||||
assert_eq!(decompress_block(&snappy, CompressionAlgorithm::Snappy).unwrap(), data);
|
||||
// All compressed results should not be empty
|
||||
assert!(
|
||||
!gzip.is_empty()
|
||||
&& !deflate.is_empty()
|
||||
&& !zstd.is_empty()
|
||||
&& !lz4.is_empty()
|
||||
&& !brotli.is_empty()
|
||||
&& !snappy.is_empty()
|
||||
);
|
||||
}
|
||||
}
|
||||
469
crates/rio/src/compress_reader.rs
Normal file
469
crates/rio/src/compress_reader.rs
Normal file
@@ -0,0 +1,469 @@
|
||||
use crate::compress::{compress_block, decompress_block, CompressionAlgorithm};
|
||||
use crate::{EtagResolvable, HashReaderDetector};
|
||||
use crate::{HashReaderMut, Reader};
|
||||
use pin_project_lite::pin_project;
|
||||
use rustfs_utils::{put_uvarint, put_uvarint_len, uvarint};
|
||||
use std::io::{self};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
pin_project! {
|
||||
#[derive(Debug)]
|
||||
/// A reader wrapper that compresses data on the fly using DEFLATE algorithm.
|
||||
pub struct CompressReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
buffer: Vec<u8>,
|
||||
pos: usize,
|
||||
done: bool,
|
||||
block_size: usize,
|
||||
compression_algorithm: CompressionAlgorithm,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> CompressReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
pub fn new(inner: R, compression_algorithm: CompressionAlgorithm) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
buffer: Vec::new(),
|
||||
pos: 0,
|
||||
done: false,
|
||||
compression_algorithm,
|
||||
block_size: 1 << 20, // Default 1MB
|
||||
}
|
||||
}
|
||||
|
||||
/// Optional: allow users to customize block_size
|
||||
pub fn with_block_size(inner: R, block_size: usize, compression_algorithm: CompressionAlgorithm) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
buffer: Vec::new(),
|
||||
pos: 0,
|
||||
done: false,
|
||||
compression_algorithm,
|
||||
block_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for CompressReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// If buffer has data, serve from buffer first
|
||||
if *this.pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.pos);
|
||||
buf.put_slice(&this.buffer[*this.pos..*this.pos + to_copy]);
|
||||
*this.pos += to_copy;
|
||||
if *this.pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.pos = 0;
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
if *this.done {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Read from inner, only read block_size bytes each time
|
||||
let mut temp = vec![0u8; *this.block_size];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
// EOF, write end header
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0xFF;
|
||||
*this.buffer = header.to_vec();
|
||||
*this.pos = 0;
|
||||
*this.done = true;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
} else {
|
||||
let uncompressed_data = &temp_buf.filled()[..n];
|
||||
|
||||
let crc = crc32fast::hash(uncompressed_data);
|
||||
let compressed_data = compress_block(uncompressed_data, *this.compression_algorithm);
|
||||
|
||||
let uncompressed_len = n;
|
||||
let compressed_len = compressed_data.len();
|
||||
let int_len = put_uvarint_len(uncompressed_len as u64);
|
||||
|
||||
let len = compressed_len + int_len + 4; // 4 bytes for CRC32
|
||||
|
||||
// Header: 8 bytes
|
||||
// 0: type (0 = compressed, 1 = uncompressed, 0xFF = end)
|
||||
// 1-3: length (little endian u24)
|
||||
// 4-7: crc32 (little endian u32)
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0x00; // 0 = compressed
|
||||
header[1] = (len & 0xFF) as u8;
|
||||
header[2] = ((len >> 8) & 0xFF) as u8;
|
||||
header[3] = ((len >> 16) & 0xFF) as u8;
|
||||
header[4] = (crc & 0xFF) as u8;
|
||||
header[5] = ((crc >> 8) & 0xFF) as u8;
|
||||
header[6] = ((crc >> 16) & 0xFF) as u8;
|
||||
header[7] = ((crc >> 24) & 0xFF) as u8;
|
||||
|
||||
// Combine header(4+4) + uncompressed_len + compressed
|
||||
let mut out = Vec::with_capacity(len + 4);
|
||||
out.extend_from_slice(&header);
|
||||
|
||||
let mut uncompressed_len_buf = vec![0u8; int_len];
|
||||
put_uvarint(&mut uncompressed_len_buf, uncompressed_len as u64);
|
||||
out.extend_from_slice(&uncompressed_len_buf);
|
||||
|
||||
out.extend_from_slice(&compressed_data);
|
||||
|
||||
*this.buffer = out;
|
||||
*this.pos = 0;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EtagResolvable for CompressReader<R>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> HashReaderDetector for CompressReader<R>
|
||||
where
|
||||
R: HashReaderDetector,
|
||||
{
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// A reader wrapper that decompresses data on the fly using DEFLATE algorithm.
|
||||
// 1~3 bytes store the length of the compressed data
|
||||
// The first byte stores the type of the compressed data: 00 = compressed, 01 = uncompressed
|
||||
// The first 4 bytes store the CRC32 checksum of the compressed data
|
||||
#[derive(Debug)]
|
||||
pub struct DecompressReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
buffer: Vec<u8>,
|
||||
buffer_pos: usize,
|
||||
finished: bool,
|
||||
// New fields for saving header read progress across polls
|
||||
header_buf: [u8; 8],
|
||||
header_read: usize,
|
||||
header_done: bool,
|
||||
// New fields for saving compressed block read progress across polls
|
||||
compressed_buf: Option<Vec<u8>>,
|
||||
compressed_read: usize,
|
||||
compressed_len: usize,
|
||||
compression_algorithm: CompressionAlgorithm,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> DecompressReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
pub fn new(inner: R, compression_algorithm: CompressionAlgorithm) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
buffer: Vec::new(),
|
||||
buffer_pos: 0,
|
||||
finished: false,
|
||||
header_buf: [0u8; 8],
|
||||
header_read: 0,
|
||||
header_done: false,
|
||||
compressed_buf: None,
|
||||
compressed_read: 0,
|
||||
compressed_len: 0,
|
||||
compression_algorithm,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for DecompressReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// Serve from buffer if any
|
||||
if *this.buffer_pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
|
||||
buf.put_slice(&this.buffer[*this.buffer_pos..*this.buffer_pos + to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
if *this.buffer_pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.buffer_pos = 0;
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
if *this.finished {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Read header, support saving progress across polls
|
||||
while !*this.header_done && *this.header_read < 8 {
|
||||
let mut temp = [0u8; 8];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp[0..8 - *this.header_read]);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
this.header_buf[*this.header_read..*this.header_read + n].copy_from_slice(&temp_buf.filled()[..n]);
|
||||
*this.header_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
if *this.header_read < 8 {
|
||||
// Header not fully read, return Pending or Ok, wait for next poll
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
|
||||
let typ = this.header_buf[0];
|
||||
let len = (this.header_buf[1] as usize) | ((this.header_buf[2] as usize) << 8) | ((this.header_buf[3] as usize) << 16);
|
||||
let crc = (this.header_buf[4] as u32)
|
||||
| ((this.header_buf[5] as u32) << 8)
|
||||
| ((this.header_buf[6] as u32) << 16)
|
||||
| ((this.header_buf[7] as u32) << 24);
|
||||
|
||||
// Header is used up, reset header_read
|
||||
*this.header_read = 0;
|
||||
*this.header_done = true;
|
||||
|
||||
if typ == 0xFF {
|
||||
*this.finished = true;
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Save compressed block read progress across polls
|
||||
if this.compressed_buf.is_none() {
|
||||
*this.compressed_len = len - 4;
|
||||
*this.compressed_buf = Some(vec![0u8; *this.compressed_len]);
|
||||
*this.compressed_read = 0;
|
||||
}
|
||||
let compressed_buf = this.compressed_buf.as_mut().unwrap();
|
||||
while *this.compressed_read < *this.compressed_len {
|
||||
let mut temp_buf = ReadBuf::new(&mut compressed_buf[*this.compressed_read..]);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
*this.compressed_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// After reading all, unpack
|
||||
let (uncompress_len, uvarint) = uvarint(&compressed_buf[0..16]);
|
||||
let compressed_data = &compressed_buf[uvarint as usize..];
|
||||
let decompressed = if typ == 0x00 {
|
||||
match decompress_block(compressed_data, *this.compression_algorithm) {
|
||||
Ok(out) => out,
|
||||
Err(e) => {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
} else if typ == 0x01 {
|
||||
compressed_data.to_vec()
|
||||
} else if typ == 0xFF {
|
||||
// Handle end marker
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
*this.finished = true;
|
||||
return Poll::Ready(Ok(()));
|
||||
} else {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Unknown compression type")));
|
||||
};
|
||||
if decompressed.len() != uncompress_len as usize {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Decompressed length mismatch")));
|
||||
}
|
||||
|
||||
let actual_crc = crc32fast::hash(&decompressed);
|
||||
if actual_crc != crc {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "CRC32 mismatch")));
|
||||
}
|
||||
*this.buffer = decompressed;
|
||||
*this.buffer_pos = 0;
|
||||
// Clear compressed block state for next block
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
*this.header_done = false;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EtagResolvable for DecompressReader<R>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> HashReaderDetector for DecompressReader<R>
|
||||
where
|
||||
R: HashReaderDetector,
|
||||
{
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compress_reader_basic() {
|
||||
let data = b"hello world, hello world, hello world!";
|
||||
let reader = Cursor::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
// DecompressReader解包
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Gzip);
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(&decompressed, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compress_reader_basic_deflate() {
|
||||
let data = b"hello world, hello world, hello world!";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
// DecompressReader解包
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Deflate);
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(&decompressed, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compress_reader_empty() {
|
||||
let data = b"";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Gzip);
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(&decompressed, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compress_reader_large() {
|
||||
use rand::Rng;
|
||||
// Generate 1MB of random bytes
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Gzip);
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(&decompressed, &data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compress_reader_large_deflate() {
|
||||
use rand::Rng;
|
||||
// Generate 1MB of random bytes
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Deflate);
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(&decompressed, &data);
|
||||
}
|
||||
}
|
||||
424
crates/rio/src/encrypt_reader.rs
Normal file
424
crates/rio/src/encrypt_reader.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
use crate::HashReaderDetector;
|
||||
use crate::HashReaderMut;
|
||||
use crate::{EtagResolvable, Reader};
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
use pin_project_lite::pin_project;
|
||||
use rustfs_utils::{put_uvarint, put_uvarint_len};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
pin_project! {
|
||||
/// A reader wrapper that encrypts data on the fly using AES-256-GCM.
|
||||
/// This is a demonstration. For production, use a secure and audited crypto library.
|
||||
#[derive(Debug)]
|
||||
pub struct EncryptReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
key: [u8; 32], // AES-256-GCM key
|
||||
nonce: [u8; 12], // 96-bit nonce for GCM
|
||||
buffer: Vec<u8>,
|
||||
buffer_pos: usize,
|
||||
finished: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EncryptReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
pub fn new(inner: R, key: [u8; 32], nonce: [u8; 12]) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
key,
|
||||
nonce,
|
||||
buffer: Vec::new(),
|
||||
buffer_pos: 0,
|
||||
finished: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for EncryptReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// Serve from buffer if any
|
||||
if *this.buffer_pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
|
||||
buf.put_slice(&this.buffer[*this.buffer_pos..*this.buffer_pos + to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
if *this.buffer_pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.buffer_pos = 0;
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
if *this.finished {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
// Read a fixed block size from inner
|
||||
let block_size = 8 * 1024;
|
||||
let mut temp = vec![0u8; block_size];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
// EOF, write end header
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0xFF; // type: end
|
||||
*this.buffer = header.to_vec();
|
||||
*this.buffer_pos = 0;
|
||||
*this.finished = true;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
} else {
|
||||
// Encrypt the chunk
|
||||
let cipher = Aes256Gcm::new_from_slice(this.key).expect("key");
|
||||
let nonce = Nonce::from_slice(this.nonce);
|
||||
let plaintext = &temp_buf.filled()[..n];
|
||||
let plaintext_len = plaintext.len();
|
||||
let crc = crc32fast::hash(plaintext);
|
||||
let ciphertext = cipher
|
||||
.encrypt(nonce, plaintext)
|
||||
.map_err(|e| std::io::Error::other(format!("encrypt error: {e}")))?;
|
||||
let int_len = put_uvarint_len(plaintext_len as u64);
|
||||
let clen = int_len + ciphertext.len() + 4;
|
||||
// Header: 8 bytes
|
||||
// 0: type (0 = encrypted, 0xFF = end)
|
||||
// 1-3: length (little endian u24, ciphertext length)
|
||||
// 4-7: CRC32 of ciphertext (little endian u32)
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0x00; // 0 = encrypted
|
||||
header[1] = (clen & 0xFF) as u8;
|
||||
header[2] = ((clen >> 8) & 0xFF) as u8;
|
||||
header[3] = ((clen >> 16) & 0xFF) as u8;
|
||||
header[4] = (crc & 0xFF) as u8;
|
||||
header[5] = ((crc >> 8) & 0xFF) as u8;
|
||||
header[6] = ((crc >> 16) & 0xFF) as u8;
|
||||
header[7] = ((crc >> 24) & 0xFF) as u8;
|
||||
let mut out = Vec::with_capacity(8 + int_len + ciphertext.len());
|
||||
out.extend_from_slice(&header);
|
||||
let mut plaintext_len_buf = vec![0u8; int_len];
|
||||
put_uvarint(&mut plaintext_len_buf, plaintext_len as u64);
|
||||
out.extend_from_slice(&plaintext_len_buf);
|
||||
out.extend_from_slice(&ciphertext);
|
||||
*this.buffer = out;
|
||||
*this.buffer_pos = 0;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EtagResolvable for EncryptReader<R>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> HashReaderDetector for EncryptReader<R>
|
||||
where
|
||||
R: EtagResolvable + HashReaderDetector,
|
||||
{
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// A reader wrapper that decrypts data on the fly using AES-256-GCM.
|
||||
/// This is a demonstration. For production, use a secure and audited crypto library.
|
||||
#[derive(Debug)]
|
||||
pub struct DecryptReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
key: [u8; 32], // AES-256-GCM key
|
||||
nonce: [u8; 12], // 96-bit nonce for GCM
|
||||
buffer: Vec<u8>,
|
||||
buffer_pos: usize,
|
||||
finished: bool,
|
||||
// For block framing
|
||||
header_buf: [u8; 8],
|
||||
header_read: usize,
|
||||
header_done: bool,
|
||||
ciphertext_buf: Option<Vec<u8>>,
|
||||
ciphertext_read: usize,
|
||||
ciphertext_len: usize,
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> DecryptReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
pub fn new(inner: R, key: [u8; 32], nonce: [u8; 12]) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
key,
|
||||
nonce,
|
||||
buffer: Vec::new(),
|
||||
buffer_pos: 0,
|
||||
finished: false,
|
||||
header_buf: [0u8; 8],
|
||||
header_read: 0,
|
||||
header_done: false,
|
||||
ciphertext_buf: None,
|
||||
ciphertext_read: 0,
|
||||
ciphertext_len: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for DecryptReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// Serve from buffer if any
|
||||
if *this.buffer_pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
|
||||
buf.put_slice(&this.buffer[*this.buffer_pos..*this.buffer_pos + to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
if *this.buffer_pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.buffer_pos = 0;
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
if *this.finished {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
// Read header (8 bytes), support partial header read
|
||||
while !*this.header_done && *this.header_read < 8 {
|
||||
let mut temp = [0u8; 8];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp[0..8 - *this.header_read]);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
this.header_buf[*this.header_read..*this.header_read + n].copy_from_slice(&temp_buf.filled()[..n]);
|
||||
*this.header_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
|
||||
}
|
||||
if *this.header_read < 8 {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
if !*this.header_done && *this.header_read == 8 {
|
||||
*this.header_done = true;
|
||||
}
|
||||
if !*this.header_done {
|
||||
return Poll::Pending;
|
||||
}
|
||||
let typ = this.header_buf[0];
|
||||
let len = (this.header_buf[1] as usize) | ((this.header_buf[2] as usize) << 8) | ((this.header_buf[3] as usize) << 16);
|
||||
let crc = (this.header_buf[4] as u32)
|
||||
| ((this.header_buf[5] as u32) << 8)
|
||||
| ((this.header_buf[6] as u32) << 16)
|
||||
| ((this.header_buf[7] as u32) << 24);
|
||||
*this.header_read = 0;
|
||||
*this.header_done = false;
|
||||
if typ == 0xFF {
|
||||
*this.finished = true;
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
// Read ciphertext block (len bytes), support partial read
|
||||
if this.ciphertext_buf.is_none() {
|
||||
*this.ciphertext_len = len - 4; // 4 bytes for CRC32
|
||||
*this.ciphertext_buf = Some(vec![0u8; *this.ciphertext_len]);
|
||||
*this.ciphertext_read = 0;
|
||||
}
|
||||
let ciphertext_buf = this.ciphertext_buf.as_mut().unwrap();
|
||||
while *this.ciphertext_read < *this.ciphertext_len {
|
||||
let mut temp_buf = ReadBuf::new(&mut ciphertext_buf[*this.ciphertext_read..]);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
*this.ciphertext_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
this.ciphertext_buf.take();
|
||||
*this.ciphertext_read = 0;
|
||||
*this.ciphertext_len = 0;
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
if *this.ciphertext_read < *this.ciphertext_len {
|
||||
return Poll::Pending;
|
||||
}
|
||||
// Parse uvarint for plaintext length
|
||||
let (plaintext_len, uvarint_len) = rustfs_utils::uvarint(&ciphertext_buf[0..16]);
|
||||
let ciphertext = &ciphertext_buf[uvarint_len as usize..];
|
||||
|
||||
// Decrypt
|
||||
let cipher = Aes256Gcm::new_from_slice(this.key).expect("key");
|
||||
let nonce = Nonce::from_slice(this.nonce);
|
||||
let plaintext = cipher
|
||||
.decrypt(nonce, ciphertext)
|
||||
.map_err(|e| std::io::Error::other(format!("decrypt error: {e}")))?;
|
||||
if plaintext.len() != plaintext_len as usize {
|
||||
this.ciphertext_buf.take();
|
||||
*this.ciphertext_read = 0;
|
||||
*this.ciphertext_len = 0;
|
||||
return Poll::Ready(Err(std::io::Error::other("Plaintext length mismatch")));
|
||||
}
|
||||
// CRC32 check
|
||||
let actual_crc = crc32fast::hash(&plaintext);
|
||||
if actual_crc != crc {
|
||||
this.ciphertext_buf.take();
|
||||
*this.ciphertext_read = 0;
|
||||
*this.ciphertext_len = 0;
|
||||
return Poll::Ready(Err(std::io::Error::other("CRC32 mismatch")));
|
||||
}
|
||||
*this.buffer = plaintext;
|
||||
*this.buffer_pos = 0;
|
||||
// Clear block state for next block
|
||||
this.ciphertext_buf.take();
|
||||
*this.ciphertext_read = 0;
|
||||
*this.ciphertext_len = 0;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EtagResolvable for DecryptReader<R>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> HashReaderDetector for DecryptReader<R>
|
||||
where
|
||||
R: EtagResolvable + HashReaderDetector,
|
||||
{
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
use rand::RngCore;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_encrypt_decrypt_reader_aes256gcm() {
|
||||
let data = b"hello sse encrypt";
|
||||
let mut key = [0u8; 32];
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
|
||||
// Encrypt
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
let mut encrypted = Vec::new();
|
||||
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
|
||||
|
||||
// Decrypt using DecryptReader
|
||||
let reader = Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
|
||||
assert_eq!(&decrypted, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_decrypt_reader_only() {
|
||||
// Encrypt some data first
|
||||
let data = b"test decrypt only";
|
||||
let mut key = [0u8; 32];
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
|
||||
// Encrypt
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
let mut encrypted = Vec::new();
|
||||
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
|
||||
|
||||
// Now test DecryptReader
|
||||
|
||||
let reader = Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
|
||||
assert_eq!(&decrypted, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_encrypt_decrypt_reader_large() {
|
||||
use rand::Rng;
|
||||
let size = 1024 * 1024;
|
||||
let mut data = vec![0u8; size];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
let mut key = [0u8; 32];
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
|
||||
let reader = std::io::Cursor::new(data.clone());
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
let mut encrypted = Vec::new();
|
||||
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
|
||||
|
||||
let reader = std::io::Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
|
||||
assert_eq!(&decrypted, &data);
|
||||
}
|
||||
}
|
||||
238
crates/rio/src/etag.rs
Normal file
238
crates/rio/src/etag.rs
Normal file
@@ -0,0 +1,238 @@
|
||||
/*!
|
||||
# AsyncRead Wrapper Types with ETag Support
|
||||
|
||||
This module demonstrates a pattern for handling wrapped AsyncRead types where:
|
||||
- Reader types contain the actual ETag capability
|
||||
- Wrapper types need to be recursively unwrapped
|
||||
- The system can handle arbitrary nesting like `CompressReader<EncryptReader<EtagReader<R>>>`
|
||||
|
||||
## Key Components
|
||||
|
||||
### Trait-Based Approach
|
||||
The `EtagResolvable` trait provides a clean way to handle recursive unwrapping:
|
||||
- Reader types implement it by returning their ETag directly
|
||||
- Wrapper types implement it by delegating to their inner type
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```rust
|
||||
// Direct usage with trait-based approach
|
||||
let mut reader = CompressReader::new(EtagReader::new(some_async_read, Some("test_etag".to_string())));
|
||||
let etag = resolve_etag_generic(&mut reader);
|
||||
```
|
||||
*/
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::resolve_etag_generic;
|
||||
use crate::{CompressReader, EncryptReader, EtagReader, HashReader};
|
||||
use std::io::Cursor;
|
||||
use tokio::io::BufReader;
|
||||
|
||||
#[test]
|
||||
fn test_etag_reader_resolution() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, Some("test_etag".to_string()));
|
||||
|
||||
// Test direct ETag resolution
|
||||
assert_eq!(resolve_etag_generic(&mut etag_reader), Some("test_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_reader_resolution() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let mut hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_etag".to_string()), false).unwrap();
|
||||
|
||||
// Test HashReader ETag resolution
|
||||
assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_reader_delegation() {
|
||||
let data = b"test data for compression";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let etag_reader = EtagReader::new(reader, Some("compress_etag".to_string()));
|
||||
let mut compress_reader = CompressReader::new(etag_reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
// Test that CompressReader delegates to inner EtagReader
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("compress_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_reader_delegation() {
|
||||
let data = b"test data for encryption";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let etag_reader = EtagReader::new(reader, Some("encrypt_etag".to_string()));
|
||||
|
||||
let key = [0u8; 32];
|
||||
let nonce = [0u8; 12];
|
||||
let mut encrypt_reader = EncryptReader::new(etag_reader, key, nonce);
|
||||
|
||||
// Test that EncryptReader delegates to inner EtagReader
|
||||
assert_eq!(resolve_etag_generic(&mut encrypt_reader), Some("encrypt_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complex_nesting() {
|
||||
let data = b"test data for complex nesting";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
// Create a complex nested structure: CompressReader<EncryptReader<EtagReader<BufReader<Cursor>>>>
|
||||
let etag_reader = EtagReader::new(reader, Some("nested_etag".to_string()));
|
||||
let key = [0u8; 32];
|
||||
let nonce = [0u8; 12];
|
||||
let encrypt_reader = EncryptReader::new(etag_reader, key, nonce);
|
||||
let mut compress_reader = CompressReader::new(encrypt_reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
// Test that nested structure can resolve ETag
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("nested_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_hash_reader_in_nested_structure() {
|
||||
let data = b"test data for hash reader nesting";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
// Create nested structure: CompressReader<HashReader<BufReader<Cursor>>>
|
||||
let hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_nested_etag".to_string()), false).unwrap();
|
||||
let mut compress_reader = CompressReader::new(hash_reader, CompressionAlgorithm::Deflate);
|
||||
|
||||
// Test that nested HashReader can be resolved
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("hash_nested_etag".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comprehensive_etag_extraction() {
|
||||
println!("🔍 Testing comprehensive ETag extraction with real reader types...");
|
||||
|
||||
// Test 1: Simple EtagReader
|
||||
let data1 = b"simple test";
|
||||
let reader1 = BufReader::new(Cursor::new(&data1[..]));
|
||||
let reader1 = Box::new(reader1);
|
||||
let mut etag_reader = EtagReader::new(reader1, Some("simple_etag".to_string()));
|
||||
assert_eq!(resolve_etag_generic(&mut etag_reader), Some("simple_etag".to_string()));
|
||||
|
||||
// Test 2: HashReader with ETag
|
||||
let data2 = b"hash test";
|
||||
let reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let mut hash_reader =
|
||||
HashReader::new(reader2, data2.len() as i64, data2.len() as i64, Some("hash_etag".to_string()), false).unwrap();
|
||||
assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string()));
|
||||
|
||||
// Test 3: Single wrapper - CompressReader<EtagReader>
|
||||
let data3 = b"compress test";
|
||||
let reader3 = BufReader::new(Cursor::new(&data3[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let etag_reader3 = EtagReader::new(reader3, Some("compress_wrapped_etag".to_string()));
|
||||
let mut compress_reader = CompressReader::new(etag_reader3, CompressionAlgorithm::Zstd);
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("compress_wrapped_etag".to_string()));
|
||||
|
||||
// Test 4: Double wrapper - CompressReader<EncryptReader<EtagReader>>
|
||||
let data4 = b"double wrap test";
|
||||
let reader4 = BufReader::new(Cursor::new(&data4[..]));
|
||||
let reader4 = Box::new(reader4);
|
||||
let etag_reader4 = EtagReader::new(reader4, Some("double_wrapped_etag".to_string()));
|
||||
let key = [1u8; 32];
|
||||
let nonce = [1u8; 12];
|
||||
let encrypt_reader4 = EncryptReader::new(etag_reader4, key, nonce);
|
||||
let mut compress_reader4 = CompressReader::new(encrypt_reader4, CompressionAlgorithm::Gzip);
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader4), Some("double_wrapped_etag".to_string()));
|
||||
|
||||
println!("✅ All ETag extraction methods work correctly!");
|
||||
println!("✅ Trait-based approach handles recursive unwrapping!");
|
||||
println!("✅ Complex nesting patterns with real reader types are supported!");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_real_world_scenario() {
|
||||
println!("🔍 Testing real-world ETag extraction scenario with actual reader types...");
|
||||
|
||||
// Simulate a real-world scenario where we have nested AsyncRead wrappers
|
||||
// and need to extract ETag information from deeply nested structures
|
||||
|
||||
let data = b"Real world test data that might be compressed and encrypted";
|
||||
let base_reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let base_reader = Box::new(base_reader);
|
||||
// Create a complex nested structure that might occur in practice:
|
||||
// CompressReader<EncryptReader<HashReader<BufReader<Cursor>>>>
|
||||
let hash_reader = HashReader::new(
|
||||
base_reader,
|
||||
data.len() as i64,
|
||||
data.len() as i64,
|
||||
Some("real_world_etag".to_string()),
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
let key = [42u8; 32];
|
||||
let nonce = [24u8; 12];
|
||||
let encrypt_reader = EncryptReader::new(hash_reader, key, nonce);
|
||||
let mut compress_reader = CompressReader::new(encrypt_reader, CompressionAlgorithm::Deflate);
|
||||
|
||||
// Extract ETag using our generic system
|
||||
let extracted_etag = resolve_etag_generic(&mut compress_reader);
|
||||
println!("📋 Extracted ETag: {:?}", extracted_etag);
|
||||
|
||||
assert_eq!(extracted_etag, Some("real_world_etag".to_string()));
|
||||
|
||||
// Test another complex nesting with EtagReader at the core
|
||||
let data2 = b"Another real world scenario";
|
||||
let base_reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let base_reader2 = Box::new(base_reader2);
|
||||
let etag_reader = EtagReader::new(base_reader2, Some("core_etag".to_string()));
|
||||
let key2 = [99u8; 32];
|
||||
let nonce2 = [88u8; 12];
|
||||
let encrypt_reader2 = EncryptReader::new(etag_reader, key2, nonce2);
|
||||
let mut compress_reader2 = CompressReader::new(encrypt_reader2, CompressionAlgorithm::Zstd);
|
||||
|
||||
let trait_etag = resolve_etag_generic(&mut compress_reader2);
|
||||
println!("📋 Trait-based ETag: {:?}", trait_etag);
|
||||
|
||||
assert_eq!(trait_etag, Some("core_etag".to_string()));
|
||||
|
||||
println!("✅ Real-world scenario test passed!");
|
||||
println!(" - Successfully extracted ETag from nested CompressReader<EncryptReader<HashReader<AsyncRead>>>");
|
||||
println!(" - Successfully extracted ETag from nested CompressReader<EncryptReader<EtagReader<AsyncRead>>>");
|
||||
println!(" - Trait-based approach works with real reader types");
|
||||
println!(" - System handles arbitrary nesting depths with actual implementations");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_etag_scenarios() {
|
||||
println!("🔍 Testing scenarios where no ETag is available...");
|
||||
|
||||
// Test with HashReader that has no etag
|
||||
let data = b"no etag test";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let mut hash_reader_no_etag = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
assert_eq!(resolve_etag_generic(&mut hash_reader_no_etag), None);
|
||||
|
||||
// Test with EtagReader that has None etag
|
||||
let data2 = b"no etag test 2";
|
||||
let reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let mut etag_reader_none = EtagReader::new(reader2, None);
|
||||
assert_eq!(resolve_etag_generic(&mut etag_reader_none), None);
|
||||
|
||||
// Test nested structure with no ETag at the core
|
||||
let data3 = b"nested no etag test";
|
||||
let reader3 = BufReader::new(Cursor::new(&data3[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let etag_reader3 = EtagReader::new(reader3, None);
|
||||
let mut compress_reader3 = CompressReader::new(etag_reader3, CompressionAlgorithm::Gzip);
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader3), None);
|
||||
|
||||
println!("✅ No ETag scenarios handled correctly!");
|
||||
}
|
||||
}
|
||||
220
crates/rio/src/etag_reader.rs
Normal file
220
crates/rio/src/etag_reader.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
use md5::{Digest, Md5};
|
||||
use pin_project_lite::pin_project;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
pin_project! {
|
||||
pub struct EtagReader {
|
||||
#[pin]
|
||||
pub inner: Box<dyn Reader>,
|
||||
pub md5: Md5,
|
||||
pub finished: bool,
|
||||
pub checksum: Option<String>,
|
||||
}
|
||||
}
|
||||
|
||||
impl EtagReader {
|
||||
pub fn new(inner: Box<dyn Reader>, checksum: Option<String>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
md5: Md5::new(),
|
||||
finished: false,
|
||||
checksum,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the final md5 value (etag) as a hex string, only compute once.
|
||||
/// Can be called multiple times, always returns the same result after finished.
|
||||
pub fn get_etag(&mut self) -> String {
|
||||
format!("{:x}", self.md5.clone().finalize())
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for EtagReader {
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
let orig_filled = buf.filled().len();
|
||||
let poll = this.inner.as_mut().poll_read(cx, buf);
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let filled = &buf.filled()[orig_filled..];
|
||||
if !filled.is_empty() {
|
||||
this.md5.update(filled);
|
||||
} else {
|
||||
// EOF
|
||||
*this.finished = true;
|
||||
if let Some(checksum) = this.checksum {
|
||||
let etag = format!("{:x}", this.md5.clone().finalize());
|
||||
if *checksum != etag {
|
||||
return Poll::Ready(Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "Checksum mismatch")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
poll
|
||||
}
|
||||
}
|
||||
|
||||
impl EtagResolvable for EtagReader {
|
||||
fn is_etag_reader(&self) -> bool {
|
||||
true
|
||||
}
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
// EtagReader provides its own etag, not delegating to inner
|
||||
if let Some(checksum) = &self.checksum {
|
||||
Some(checksum.clone())
|
||||
} else if self.finished {
|
||||
Some(self.get_etag())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HashReaderDetector for EtagReader {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_basic() {
|
||||
let data = b"hello world";
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = etag_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, data.len());
|
||||
assert_eq!(&buf, data);
|
||||
|
||||
let etag = etag_reader.try_resolve_etag();
|
||||
assert_eq!(etag, Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_empty() {
|
||||
let data = b"";
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = etag_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, 0);
|
||||
assert!(buf.is_empty());
|
||||
|
||||
let etag = etag_reader.try_resolve_etag();
|
||||
assert_eq!(etag, Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_multiple_get() {
|
||||
let data = b"abc123";
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let _ = etag_reader.read_to_end(&mut buf).await.unwrap();
|
||||
|
||||
// Call etag multiple times, should always return the same result
|
||||
let etag1 = { etag_reader.try_resolve_etag() };
|
||||
let etag2 = { etag_reader.try_resolve_etag() };
|
||||
assert_eq!(etag1, Some(expected.clone()));
|
||||
assert_eq!(etag2, Some(expected.clone()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_not_finished() {
|
||||
let data = b"abc123";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
// Do not read to end, etag should be None
|
||||
let mut buf = [0u8; 2];
|
||||
let _ = etag_reader.read(&mut buf).await.unwrap();
|
||||
assert_eq!(etag_reader.try_resolve_etag(), None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_large_data() {
|
||||
use rand::Rng;
|
||||
// Generate 3MB random data
|
||||
let size = 3 * 1024 * 1024;
|
||||
let mut data = vec![0u8; size];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(&data);
|
||||
|
||||
let cloned_data = data.clone();
|
||||
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
|
||||
let reader = Cursor::new(data.clone());
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = etag_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, size);
|
||||
assert_eq!(&buf, &cloned_data);
|
||||
|
||||
let etag = etag_reader.try_resolve_etag();
|
||||
assert_eq!(etag, Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_checksum_match() {
|
||||
let data = b"checksum test data";
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, Some(expected.clone()));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = etag_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, data.len());
|
||||
assert_eq!(&buf, data);
|
||||
// 校验通过,etag应等于expected
|
||||
assert_eq!(etag_reader.try_resolve_etag(), Some(expected));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_etag_reader_checksum_mismatch() {
|
||||
let data = b"checksum test data";
|
||||
let wrong_checksum = "deadbeefdeadbeefdeadbeefdeadbeef".to_string();
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let mut etag_reader = EtagReader::new(reader, Some(wrong_checksum));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
// 校验失败,应该返回InvalidData错误
|
||||
let err = etag_reader.read_to_end(&mut buf).await.unwrap_err();
|
||||
assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
|
||||
}
|
||||
}
|
||||
134
crates/rio/src/hardlimit_reader.rs
Normal file
134
crates/rio/src/hardlimit_reader.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
use std::io::{Error, Result};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
pin_project! {
|
||||
pub struct HardLimitReader {
|
||||
#[pin]
|
||||
pub inner: Box<dyn Reader>,
|
||||
remaining: i64,
|
||||
}
|
||||
}
|
||||
|
||||
impl HardLimitReader {
|
||||
pub fn new(inner: Box<dyn Reader>, limit: i64) -> Self {
|
||||
HardLimitReader { inner, remaining: limit }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for HardLimitReader {
|
||||
fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<Result<()>> {
|
||||
if self.remaining < 0 {
|
||||
return Poll::Ready(Err(Error::other("input provided more bytes than specified")));
|
||||
}
|
||||
// Save the initial length
|
||||
let before = buf.filled().len();
|
||||
|
||||
// Poll the inner reader
|
||||
let this = self.as_mut().project();
|
||||
let poll = this.inner.poll_read(cx, buf);
|
||||
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let after = buf.filled().len();
|
||||
let read = (after - before) as i64;
|
||||
self.remaining -= read;
|
||||
if self.remaining < 0 {
|
||||
return Poll::Ready(Err(Error::other("input provided more bytes than specified")));
|
||||
}
|
||||
}
|
||||
poll
|
||||
}
|
||||
}
|
||||
|
||||
impl EtagResolvable for HardLimitReader {
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl HashReaderDetector for HardLimitReader {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::vec;
|
||||
|
||||
use super::*;
|
||||
use rustfs_utils::read_full;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hardlimit_reader_normal() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let hardlimit = HardLimitReader::new(reader, 20);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
let n = r.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, data.len());
|
||||
assert_eq!(&buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hardlimit_reader_exact_limit() {
|
||||
let data = b"1234567890";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let hardlimit = HardLimitReader::new(reader, 10);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
let n = r.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, 10);
|
||||
assert_eq!(&buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hardlimit_reader_exceed_limit() {
|
||||
let data = b"abcdef";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let hardlimit = HardLimitReader::new(reader, 3);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = vec![0u8; 10];
|
||||
// 读取超限,应该返回错误
|
||||
let err = match read_full(&mut r, &mut buf).await {
|
||||
Ok(n) => {
|
||||
println!("Read {} bytes", n);
|
||||
assert_eq!(n, 3);
|
||||
assert_eq!(&buf[..n], b"abc");
|
||||
None
|
||||
}
|
||||
Err(e) => Some(e),
|
||||
};
|
||||
|
||||
assert!(err.is_some());
|
||||
|
||||
let err = err.unwrap();
|
||||
assert_eq!(err.kind(), std::io::ErrorKind::Other);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hardlimit_reader_empty() {
|
||||
let data = b"";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let hardlimit = HardLimitReader::new(reader, 5);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
let n = r.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, 0);
|
||||
assert_eq!(&buf, data);
|
||||
}
|
||||
}
|
||||
569
crates/rio/src/hash_reader.rs
Normal file
569
crates/rio/src/hash_reader.rs
Normal file
@@ -0,0 +1,569 @@
|
||||
//! HashReader implementation with generic support
|
||||
//!
|
||||
//! This module provides a generic `HashReader<R>` that can wrap any type implementing
|
||||
//! `AsyncRead + Unpin + Send + Sync + 'static + EtagResolvable`.
|
||||
//!
|
||||
//! ## Migration from the original Reader enum
|
||||
//!
|
||||
//! The original `HashReader::new` method that worked with the `Reader` enum
|
||||
//! has been replaced with a generic approach. To preserve the original logic:
|
||||
//!
|
||||
//! ### Original logic (before generics):
|
||||
//! ```ignore
|
||||
//! // Original code would do:
|
||||
//! // 1. Check if inner is already a HashReader
|
||||
//! // 2. If size > 0, wrap with HardLimitReader
|
||||
//! // 3. If !diskable_md5, wrap with EtagReader
|
||||
//! // 4. Create HashReader with the wrapped reader
|
||||
//!
|
||||
//! let reader = HashReader::new(inner, size, actual_size, etag, diskable_md5)?;
|
||||
//! ```
|
||||
//!
|
||||
//! ### New generic approach:
|
||||
//! ```rust
|
||||
//! use rustfs_rio::{HashReader, HardLimitReader, EtagReader};
|
||||
//! use tokio::io::BufReader;
|
||||
//! use std::io::Cursor;
|
||||
//!
|
||||
//! # tokio_test::block_on(async {
|
||||
//! let data = b"hello world";
|
||||
//! let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let size = data.len() as i64;
|
||||
//! let actual_size = size;
|
||||
//! let etag = None;
|
||||
//! let diskable_md5 = false;
|
||||
//!
|
||||
//! // Method 1: Simple creation (recommended for most cases)
|
||||
//! let hash_reader = HashReader::new(reader, size, actual_size, etag, diskable_md5);
|
||||
//!
|
||||
//! // Method 2: With manual wrapping to recreate original logic
|
||||
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let wrapped_reader = if size > 0 {
|
||||
//! if !diskable_md5 {
|
||||
//! // Wrap with both HardLimitReader and EtagReader
|
||||
//! let hard_limit = HardLimitReader::new(reader2, size);
|
||||
//! EtagReader::new(hard_limit, etag.clone())
|
||||
//! } else {
|
||||
//! // Only wrap with HardLimitReader
|
||||
//! HardLimitReader::new(reader2, size)
|
||||
//! }
|
||||
//! } else if !diskable_md5 {
|
||||
//! // Only wrap with EtagReader
|
||||
//! EtagReader::new(reader2, etag.clone())
|
||||
//! } else {
|
||||
//! // No wrapping needed
|
||||
//! reader2
|
||||
//! };
|
||||
//! let hash_reader2 = HashReader::new(wrapped_reader, size, actual_size, etag, diskable_md5);
|
||||
//! # });
|
||||
//! ```
|
||||
//!
|
||||
//! ## HashReader Detection
|
||||
//!
|
||||
//! The `HashReaderDetector` trait allows detection of existing HashReader instances:
|
||||
//!
|
||||
//! ```rust
|
||||
//! use rustfs_rio::{HashReader, HashReaderDetector};
|
||||
//! use tokio::io::BufReader;
|
||||
//! use std::io::Cursor;
|
||||
//!
|
||||
//! # tokio_test::block_on(async {
|
||||
//! let data = b"test";
|
||||
//! let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let hash_reader = HashReader::new(reader, 4, 4, None, false);
|
||||
//!
|
||||
//! // Check if a type is a HashReader
|
||||
//! assert!(hash_reader.is_hash_reader());
|
||||
//!
|
||||
//! // Use new for compatibility (though it's simpler to use new() directly)
|
||||
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let result = HashReader::new(reader2, 4, 4, None, false);
|
||||
//! assert!(result.is_ok());
|
||||
//! # });
|
||||
//! ```
|
||||
|
||||
use pin_project_lite::pin_project;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::{EtagReader, EtagResolvable, HardLimitReader, HashReaderDetector, Reader};
|
||||
|
||||
/// Trait for mutable operations on HashReader
|
||||
pub trait HashReaderMut {
|
||||
fn bytes_read(&self) -> u64;
|
||||
fn checksum(&self) -> &Option<String>;
|
||||
fn set_checksum(&mut self, checksum: Option<String>);
|
||||
fn size(&self) -> i64;
|
||||
fn set_size(&mut self, size: i64);
|
||||
fn actual_size(&self) -> i64;
|
||||
fn set_actual_size(&mut self, actual_size: i64);
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
|
||||
pub struct HashReader {
|
||||
#[pin]
|
||||
pub inner: Box<dyn Reader>,
|
||||
pub size: i64,
|
||||
checksum: Option<String>,
|
||||
pub actual_size: i64,
|
||||
pub diskable_md5: bool,
|
||||
bytes_read: u64,
|
||||
// TODO: content_hash
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl HashReader {
|
||||
pub fn new(
|
||||
mut inner: Box<dyn Reader>,
|
||||
size: i64,
|
||||
actual_size: i64,
|
||||
md5: Option<String>,
|
||||
diskable_md5: bool,
|
||||
) -> std::io::Result<Self> {
|
||||
// Check if it's already a HashReader and update its parameters
|
||||
if let Some(existing_hash_reader) = inner.as_hash_reader_mut() {
|
||||
if existing_hash_reader.bytes_read() > 0 {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
"Cannot create HashReader from an already read HashReader",
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(checksum) = existing_hash_reader.checksum() {
|
||||
if let Some(ref md5) = md5 {
|
||||
if checksum != md5 {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::InvalidData, "HashReader checksum mismatch"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if existing_hash_reader.size() > 0 && size > 0 && existing_hash_reader.size() != size {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("HashReader size mismatch: expected {}, got {}", existing_hash_reader.size(), size),
|
||||
));
|
||||
}
|
||||
|
||||
existing_hash_reader.set_checksum(md5.clone());
|
||||
|
||||
if existing_hash_reader.size() < 0 && size >= 0 {
|
||||
existing_hash_reader.set_size(size);
|
||||
}
|
||||
|
||||
if existing_hash_reader.actual_size() <= 0 && actual_size >= 0 {
|
||||
existing_hash_reader.set_actual_size(actual_size);
|
||||
}
|
||||
|
||||
return Ok(Self {
|
||||
inner,
|
||||
size,
|
||||
checksum: md5,
|
||||
actual_size,
|
||||
diskable_md5,
|
||||
bytes_read: 0,
|
||||
});
|
||||
}
|
||||
|
||||
if size > 0 {
|
||||
let hr = HardLimitReader::new(inner, size);
|
||||
inner = Box::new(hr);
|
||||
if !diskable_md5 && !inner.is_hash_reader() {
|
||||
let er = EtagReader::new(inner, md5.clone());
|
||||
inner = Box::new(er);
|
||||
}
|
||||
} else if !diskable_md5 {
|
||||
let er = EtagReader::new(inner, md5.clone());
|
||||
inner = Box::new(er);
|
||||
}
|
||||
Ok(Self {
|
||||
inner,
|
||||
size,
|
||||
checksum: md5,
|
||||
actual_size,
|
||||
diskable_md5,
|
||||
bytes_read: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Update HashReader parameters
|
||||
pub fn update_params(&mut self, size: i64, actual_size: i64, etag: Option<String>) {
|
||||
if self.size < 0 && size >= 0 {
|
||||
self.size = size;
|
||||
}
|
||||
|
||||
if self.actual_size <= 0 && actual_size > 0 {
|
||||
self.actual_size = actual_size;
|
||||
}
|
||||
|
||||
if etag.is_some() {
|
||||
self.checksum = etag;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> i64 {
|
||||
self.size
|
||||
}
|
||||
pub fn actual_size(&self) -> i64 {
|
||||
self.actual_size
|
||||
}
|
||||
}
|
||||
|
||||
impl HashReaderMut for HashReader {
|
||||
fn bytes_read(&self) -> u64 {
|
||||
self.bytes_read
|
||||
}
|
||||
|
||||
fn checksum(&self) -> &Option<String> {
|
||||
&self.checksum
|
||||
}
|
||||
|
||||
fn set_checksum(&mut self, checksum: Option<String>) {
|
||||
self.checksum = checksum;
|
||||
}
|
||||
|
||||
fn size(&self) -> i64 {
|
||||
self.size
|
||||
}
|
||||
|
||||
fn set_size(&mut self, size: i64) {
|
||||
self.size = size;
|
||||
}
|
||||
|
||||
fn actual_size(&self) -> i64 {
|
||||
self.actual_size
|
||||
}
|
||||
|
||||
fn set_actual_size(&mut self, actual_size: i64) {
|
||||
self.actual_size = actual_size;
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for HashReader {
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
let this = self.project();
|
||||
let poll = this.inner.poll_read(cx, buf);
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let filled = buf.filled().len();
|
||||
*this.bytes_read += filled as u64;
|
||||
|
||||
if filled == 0 {
|
||||
// EOF
|
||||
// TODO: check content_hash
|
||||
}
|
||||
}
|
||||
poll
|
||||
}
|
||||
}
|
||||
|
||||
impl EtagResolvable for HashReader {
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
if self.diskable_md5 {
|
||||
return None;
|
||||
}
|
||||
if let Some(etag) = self.inner.try_resolve_etag() {
|
||||
return Some(etag);
|
||||
}
|
||||
// If no etag from inner and we have a stored checksum, return it
|
||||
self.checksum.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl HashReaderDetector for HashReader {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
Some(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{encrypt_reader, DecryptReader};
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hashreader_wrapping_logic() {
|
||||
let data = b"hello world";
|
||||
let size = data.len() as i64;
|
||||
let actual_size = size;
|
||||
let etag = None;
|
||||
|
||||
// Test 1: Simple creation
|
||||
let reader1 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader1 = Box::new(reader1);
|
||||
let hash_reader1 = HashReader::new(reader1, size, actual_size, etag.clone(), false).unwrap();
|
||||
assert_eq!(hash_reader1.size(), size);
|
||||
assert_eq!(hash_reader1.actual_size(), actual_size);
|
||||
|
||||
// Test 2: With HardLimitReader wrapping
|
||||
let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let hard_limit = HardLimitReader::new(reader2, size);
|
||||
let hard_limit = Box::new(hard_limit);
|
||||
let hash_reader2 = HashReader::new(hard_limit, size, actual_size, etag.clone(), false).unwrap();
|
||||
assert_eq!(hash_reader2.size(), size);
|
||||
assert_eq!(hash_reader2.actual_size(), actual_size);
|
||||
|
||||
// Test 3: With EtagReader wrapping
|
||||
let reader3 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let etag_reader = EtagReader::new(reader3, etag.clone());
|
||||
let etag_reader = Box::new(etag_reader);
|
||||
let hash_reader3 = HashReader::new(etag_reader, size, actual_size, etag.clone(), false).unwrap();
|
||||
assert_eq!(hash_reader3.size(), size);
|
||||
assert_eq!(hash_reader3.actual_size(), actual_size);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hashreader_etag_basic() {
|
||||
let data = b"hello hashreader";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
|
||||
// Since we removed EtagReader integration, etag might be None
|
||||
let _etag = hash_reader.try_resolve_etag();
|
||||
// Just check that we can call etag() without error
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hashreader_diskable_md5() {
|
||||
let data = b"no etag";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, true).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
|
||||
// Etag should be None when diskable_md5 is true
|
||||
let etag = hash_reader.try_resolve_etag();
|
||||
assert!(etag.is_none());
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_hashreader_new_logic() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
// Create a HashReader first
|
||||
let hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false).unwrap();
|
||||
let hash_reader = Box::new(hash_reader);
|
||||
// Now try to create another HashReader from the existing one using new
|
||||
let result = HashReader::new(hash_reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let final_reader = result.unwrap();
|
||||
assert_eq!(final_reader.checksum, Some("test_etag".to_string()));
|
||||
assert_eq!(final_reader.size(), data.len() as i64);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_for_wrapping_readers() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
use md5::{Digest, Md5};
|
||||
use rand::Rng;
|
||||
use rand::RngCore;
|
||||
|
||||
// Generate 1MB random data
|
||||
let size = 1024 * 1024;
|
||||
let mut data = vec![0u8; size];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(&data);
|
||||
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
|
||||
println!("expected: {}", expected);
|
||||
|
||||
let reader = Cursor::new(data.clone());
|
||||
let reader = BufReader::new(reader);
|
||||
|
||||
// 启用压缩测试
|
||||
let is_compress = true;
|
||||
let size = data.len() as i64;
|
||||
let actual_size = data.len() as i64;
|
||||
|
||||
let reader = Box::new(reader);
|
||||
// 创建 HashReader
|
||||
let mut hr = HashReader::new(reader, size, actual_size, Some(expected.clone()), false).unwrap();
|
||||
|
||||
// 如果启用压缩,先压缩数据
|
||||
let compressed_data = if is_compress {
|
||||
let mut compressed_buf = Vec::new();
|
||||
let compress_reader = CompressReader::new(hr, CompressionAlgorithm::Gzip);
|
||||
let mut compress_reader = compress_reader;
|
||||
compress_reader.read_to_end(&mut compressed_buf).await.unwrap();
|
||||
|
||||
println!("Original size: {}, Compressed size: {}", data.len(), compressed_buf.len());
|
||||
|
||||
compressed_buf
|
||||
} else {
|
||||
// 如果不压缩,直接读取原始数据
|
||||
let mut buf = Vec::new();
|
||||
hr.read_to_end(&mut buf).await.unwrap();
|
||||
buf
|
||||
};
|
||||
|
||||
let mut key = [0u8; 32];
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
|
||||
let is_encrypt = true;
|
||||
|
||||
if is_encrypt {
|
||||
// 加密压缩后的数据
|
||||
let encrypt_reader = encrypt_reader::EncryptReader::new(Cursor::new(compressed_data), key, nonce);
|
||||
let mut encrypted_data = Vec::new();
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
encrypt_reader.read_to_end(&mut encrypted_data).await.unwrap();
|
||||
|
||||
println!("Encrypted size: {}", encrypted_data.len());
|
||||
|
||||
// 解密数据
|
||||
let decrypt_reader = DecryptReader::new(Cursor::new(encrypted_data), key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted_data = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted_data).await.unwrap();
|
||||
|
||||
if is_compress {
|
||||
// 如果使用了压缩,需要解压缩
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(decrypted_data), CompressionAlgorithm::Gzip);
|
||||
let mut decompress_reader = decompress_reader;
|
||||
let mut final_data = Vec::new();
|
||||
decompress_reader.read_to_end(&mut final_data).await.unwrap();
|
||||
|
||||
println!("Final decompressed size: {}", final_data.len());
|
||||
assert_eq!(final_data.len() as i64, actual_size);
|
||||
assert_eq!(&final_data, &data);
|
||||
} else {
|
||||
// 如果没有压缩,直接比较解密后的数据
|
||||
assert_eq!(decrypted_data.len() as i64, actual_size);
|
||||
assert_eq!(&decrypted_data, &data);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// 如果不加密,直接处理压缩/解压缩
|
||||
if is_compress {
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(compressed_data), CompressionAlgorithm::Gzip);
|
||||
let mut decompress_reader = decompress_reader;
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
assert_eq!(decompressed.len() as i64, actual_size);
|
||||
assert_eq!(&decompressed, &data);
|
||||
} else {
|
||||
assert_eq!(compressed_data.len() as i64, actual_size);
|
||||
assert_eq!(&compressed_data, &data);
|
||||
}
|
||||
|
||||
// 验证 etag(注意:压缩会改变数据,所以这里的 etag 验证可能需要调整)
|
||||
println!(
|
||||
"Test completed successfully with compression: {}, encryption: {}",
|
||||
is_compress, is_encrypt
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compression_with_compressible_data() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
|
||||
// Create highly compressible data (repeated pattern)
|
||||
let pattern = b"Hello, World! This is a test pattern that should compress well. ";
|
||||
let repeat_count = 16384; // 16K repetitions
|
||||
let mut data = Vec::new();
|
||||
for _ in 0..repeat_count {
|
||||
data.extend_from_slice(pattern);
|
||||
}
|
||||
|
||||
println!("Original data size: {} bytes", data.len());
|
||||
|
||||
let reader = BufReader::new(Cursor::new(data.clone()));
|
||||
let reader = Box::new(reader);
|
||||
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
|
||||
// Test compression
|
||||
let compress_reader = CompressReader::new(hash_reader, CompressionAlgorithm::Gzip);
|
||||
let mut compressed_data = Vec::new();
|
||||
let mut compress_reader = compress_reader;
|
||||
compress_reader.read_to_end(&mut compressed_data).await.unwrap();
|
||||
|
||||
println!("Compressed data size: {} bytes", compressed_data.len());
|
||||
println!("Compression ratio: {:.2}%", (compressed_data.len() as f64 / data.len() as f64) * 100.0);
|
||||
|
||||
// Verify compression actually reduced size for this compressible data
|
||||
assert!(compressed_data.len() < data.len(), "Compression should reduce size for repetitive data");
|
||||
|
||||
// Test decompression
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(compressed_data), CompressionAlgorithm::Gzip);
|
||||
let mut decompressed_data = Vec::new();
|
||||
let mut decompress_reader = decompress_reader;
|
||||
decompress_reader.read_to_end(&mut decompressed_data).await.unwrap();
|
||||
|
||||
// Verify decompressed data matches original
|
||||
assert_eq!(decompressed_data.len(), data.len());
|
||||
assert_eq!(&decompressed_data, &data);
|
||||
|
||||
println!("Compression/decompression test passed successfully!");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compression_algorithms() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
|
||||
let data = b"This is test data for compression algorithm testing. ".repeat(1000);
|
||||
println!("Testing with {} bytes of data", data.len());
|
||||
|
||||
let algorithms = vec![
|
||||
CompressionAlgorithm::Gzip,
|
||||
CompressionAlgorithm::Deflate,
|
||||
CompressionAlgorithm::Zstd,
|
||||
];
|
||||
|
||||
for algorithm in algorithms {
|
||||
println!("\nTesting algorithm: {:?}", algorithm);
|
||||
|
||||
let reader = BufReader::new(Cursor::new(data.clone()));
|
||||
let reader = Box::new(reader);
|
||||
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
|
||||
// Compress
|
||||
let compress_reader = CompressReader::new(hash_reader, algorithm);
|
||||
let mut compressed_data = Vec::new();
|
||||
let mut compress_reader = compress_reader;
|
||||
compress_reader.read_to_end(&mut compressed_data).await.unwrap();
|
||||
|
||||
println!(
|
||||
" Compressed size: {} bytes (ratio: {:.2}%)",
|
||||
compressed_data.len(),
|
||||
(compressed_data.len() as f64 / data.len() as f64) * 100.0
|
||||
);
|
||||
|
||||
// Decompress
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(compressed_data), algorithm);
|
||||
let mut decompressed_data = Vec::new();
|
||||
let mut decompress_reader = decompress_reader;
|
||||
decompress_reader.read_to_end(&mut decompressed_data).await.unwrap();
|
||||
|
||||
// Verify
|
||||
assert_eq!(decompressed_data.len(), data.len());
|
||||
assert_eq!(&decompressed_data, &data);
|
||||
println!(" ✓ Algorithm {:?} test passed", algorithm);
|
||||
}
|
||||
}
|
||||
}
|
||||
429
crates/rio/src/http_reader.rs
Normal file
429
crates/rio/src/http_reader.rs
Normal file
@@ -0,0 +1,429 @@
|
||||
use bytes::Bytes;
|
||||
use futures::{Stream, StreamExt};
|
||||
use http::HeaderMap;
|
||||
use pin_project_lite::pin_project;
|
||||
use reqwest::{Client, Method, RequestBuilder};
|
||||
use std::io::{self, Error};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, DuplexStream, ReadBuf};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut};
|
||||
|
||||
static HTTP_DEBUG_LOG: bool = false;
|
||||
#[inline(always)]
|
||||
fn http_debug_log(args: std::fmt::Arguments) {
|
||||
if HTTP_DEBUG_LOG {
|
||||
println!("{}", args);
|
||||
}
|
||||
}
|
||||
macro_rules! http_log {
|
||||
($($arg:tt)*) => {
|
||||
http_debug_log(format_args!($($arg)*));
|
||||
};
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
pub struct HttpReader {
|
||||
url:String,
|
||||
method: Method,
|
||||
headers: HeaderMap,
|
||||
inner: DuplexStream,
|
||||
err_rx: oneshot::Receiver<std::io::Error>,
|
||||
}
|
||||
}
|
||||
|
||||
impl HttpReader {
|
||||
pub async fn new(url: String, method: Method, headers: HeaderMap) -> io::Result<Self> {
|
||||
http_log!("[HttpReader::new] url: {url}, method: {method:?}, headers: {headers:?}");
|
||||
Self::with_capacity(url, method, headers, 0).await
|
||||
}
|
||||
/// Create a new HttpReader from a URL. The request is performed immediately.
|
||||
pub async fn with_capacity(url: String, method: Method, headers: HeaderMap, mut read_buf_size: usize) -> io::Result<Self> {
|
||||
http_log!(
|
||||
"[HttpReader::with_capacity] url: {url}, method: {method:?}, headers: {headers:?}, buf_size: {}",
|
||||
read_buf_size
|
||||
);
|
||||
// First, check if the connection is available (HEAD)
|
||||
let client = Client::new();
|
||||
let head_resp = client.head(&url).headers(headers.clone()).send().await;
|
||||
match head_resp {
|
||||
Ok(resp) => {
|
||||
http_log!("[HttpReader::new] HEAD status: {}", resp.status());
|
||||
if !resp.status().is_success() {
|
||||
return Err(Error::other(format!("HEAD failed: status {}", resp.status())));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
http_log!("[HttpReader::new] HEAD error: {e}");
|
||||
return Err(Error::other(format!("HEAD request failed: {e}")));
|
||||
}
|
||||
}
|
||||
|
||||
let url_clone = url.clone();
|
||||
let method_clone = method.clone();
|
||||
let headers_clone = headers.clone();
|
||||
|
||||
if read_buf_size == 0 {
|
||||
read_buf_size = 8192; // Default buffer size
|
||||
}
|
||||
let (rd, mut wd) = tokio::io::duplex(read_buf_size);
|
||||
let (err_tx, err_rx) = oneshot::channel::<io::Error>();
|
||||
tokio::spawn(async move {
|
||||
let client = Client::new();
|
||||
let request: RequestBuilder = client.request(method_clone, url_clone).headers(headers_clone);
|
||||
|
||||
let response = request.send().await;
|
||||
match response {
|
||||
Ok(resp) => {
|
||||
if resp.status().is_success() {
|
||||
let mut stream = resp.bytes_stream();
|
||||
while let Some(chunk) = stream.next().await {
|
||||
match chunk {
|
||||
Ok(data) => {
|
||||
if let Err(e) = wd.write_all(&data).await {
|
||||
let _ = err_tx.send(Error::other(format!("HttpReader write error: {}", e)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = err_tx.send(Error::other(format!("HttpReader stream error: {}", e)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
http_log!("[HttpReader::spawn] HTTP request failed with status: {}", resp.status());
|
||||
let _ = err_tx.send(Error::other(format!(
|
||||
"HttpReader HTTP request failed with non-200 status {}",
|
||||
resp.status()
|
||||
)));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = err_tx.send(Error::other(format!("HttpReader HTTP request error: {}", e)));
|
||||
}
|
||||
}
|
||||
|
||||
http_log!("[HttpReader::spawn] HTTP request completed, exiting");
|
||||
});
|
||||
Ok(Self {
|
||||
inner: rd,
|
||||
err_rx,
|
||||
url,
|
||||
method,
|
||||
headers,
|
||||
})
|
||||
}
|
||||
pub fn url(&self) -> &str {
|
||||
&self.url
|
||||
}
|
||||
pub fn method(&self) -> &Method {
|
||||
&self.method
|
||||
}
|
||||
pub fn headers(&self) -> &HeaderMap {
|
||||
&self.headers
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncRead for HttpReader {
|
||||
fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
http_log!(
|
||||
"[HttpReader::poll_read] url: {}, method: {:?}, buf.remaining: {}",
|
||||
self.url,
|
||||
self.method,
|
||||
buf.remaining()
|
||||
);
|
||||
// Check for errors from the request
|
||||
match Pin::new(&mut self.err_rx).try_recv() {
|
||||
Ok(e) => return Poll::Ready(Err(e)),
|
||||
Err(oneshot::error::TryRecvError::Empty) => {}
|
||||
Err(oneshot::error::TryRecvError::Closed) => {
|
||||
// return Poll::Ready(Err(Error::new(ErrorKind::Other, "HTTP request closed")));
|
||||
}
|
||||
}
|
||||
// Read from the inner stream
|
||||
Pin::new(&mut self.inner).poll_read(cx, buf)
|
||||
}
|
||||
}
|
||||
|
||||
impl EtagResolvable for HttpReader {
|
||||
fn is_etag_reader(&self) -> bool {
|
||||
false
|
||||
}
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl HashReaderDetector for HttpReader {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
struct ReceiverStream {
|
||||
receiver: mpsc::Receiver<Option<Bytes>>,
|
||||
}
|
||||
|
||||
impl Stream for ReceiverStream {
|
||||
type Item = Result<Bytes, std::io::Error>;
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let poll = Pin::new(&mut self.receiver).poll_recv(cx);
|
||||
match &poll {
|
||||
Poll::Ready(Some(Some(ref bytes))) => {
|
||||
http_log!("[ReceiverStream] poll_next: got {} bytes", bytes.len());
|
||||
}
|
||||
Poll::Ready(Some(None)) => {
|
||||
http_log!("[ReceiverStream] poll_next: sender shutdown");
|
||||
}
|
||||
Poll::Ready(None) => {
|
||||
http_log!("[ReceiverStream] poll_next: channel closed");
|
||||
}
|
||||
Poll::Pending => {
|
||||
// http_log!("[ReceiverStream] poll_next: pending");
|
||||
}
|
||||
}
|
||||
match poll {
|
||||
Poll::Ready(Some(Some(bytes))) => Poll::Ready(Some(Ok(bytes))),
|
||||
Poll::Ready(Some(None)) => Poll::Ready(None), // Sender shutdown
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
pub struct HttpWriter {
|
||||
url:String,
|
||||
method: Method,
|
||||
headers: HeaderMap,
|
||||
err_rx: tokio::sync::oneshot::Receiver<std::io::Error>,
|
||||
sender: tokio::sync::mpsc::Sender<Option<Bytes>>,
|
||||
handle: tokio::task::JoinHandle<std::io::Result<()>>,
|
||||
finish:bool,
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
impl HttpWriter {
|
||||
/// Create a new HttpWriter for the given URL. The HTTP request is performed in the background.
|
||||
pub async fn new(url: String, method: Method, headers: HeaderMap) -> io::Result<Self> {
|
||||
http_log!("[HttpWriter::new] url: {url}, method: {method:?}, headers: {headers:?}");
|
||||
let url_clone = url.clone();
|
||||
let method_clone = method.clone();
|
||||
let headers_clone = headers.clone();
|
||||
|
||||
// First, try to write empty data to check if writable
|
||||
let client = Client::new();
|
||||
let resp = client.put(&url).headers(headers.clone()).body(Vec::new()).send().await;
|
||||
match resp {
|
||||
Ok(resp) => {
|
||||
http_log!("[HttpWriter::new] empty PUT status: {}", resp.status());
|
||||
if !resp.status().is_success() {
|
||||
return Err(Error::other(format!("Empty PUT failed: status {}", resp.status())));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
http_log!("[HttpWriter::new] empty PUT error: {e}");
|
||||
return Err(Error::other(format!("Empty PUT failed: {e}")));
|
||||
}
|
||||
}
|
||||
|
||||
let (sender, receiver) = tokio::sync::mpsc::channel::<Option<Bytes>>(8);
|
||||
let (err_tx, err_rx) = tokio::sync::oneshot::channel::<io::Error>();
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let stream = ReceiverStream { receiver };
|
||||
let body = reqwest::Body::wrap_stream(stream);
|
||||
http_log!(
|
||||
"[HttpWriter::spawn] sending HTTP request: url={url_clone}, method={method_clone:?}, headers={headers_clone:?}"
|
||||
);
|
||||
|
||||
let client = Client::new();
|
||||
let request = client
|
||||
.request(method_clone, url_clone.clone())
|
||||
.headers(headers_clone.clone())
|
||||
.body(body);
|
||||
|
||||
// Hold the request until the shutdown signal is received
|
||||
let response = request.send().await;
|
||||
|
||||
match response {
|
||||
Ok(resp) => {
|
||||
http_log!("[HttpWriter::spawn] got response: status={}", resp.status());
|
||||
if !resp.status().is_success() {
|
||||
let _ = err_tx.send(Error::other(format!(
|
||||
"HttpWriter HTTP request failed with non-200 status {}",
|
||||
resp.status()
|
||||
)));
|
||||
return Err(Error::other(format!("HTTP request failed with non-200 status {}", resp.status())));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
http_log!("[HttpWriter::spawn] HTTP request error: {e}");
|
||||
let _ = err_tx.send(Error::other(format!("HTTP request failed: {}", e)));
|
||||
return Err(Error::other(format!("HTTP request failed: {}", e)));
|
||||
}
|
||||
}
|
||||
|
||||
http_log!("[HttpWriter::spawn] HTTP request completed, exiting");
|
||||
Ok(())
|
||||
});
|
||||
|
||||
http_log!("[HttpWriter::new] connection established successfully");
|
||||
Ok(Self {
|
||||
url,
|
||||
method,
|
||||
headers,
|
||||
err_rx,
|
||||
sender,
|
||||
handle,
|
||||
finish: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &str {
|
||||
&self.url
|
||||
}
|
||||
|
||||
pub fn method(&self) -> &Method {
|
||||
&self.method
|
||||
}
|
||||
|
||||
pub fn headers(&self) -> &HeaderMap {
|
||||
&self.headers
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncWrite for HttpWriter {
|
||||
fn poll_write(mut self: Pin<&mut Self>, _cx: &mut Context<'_>, buf: &[u8]) -> Poll<io::Result<usize>> {
|
||||
http_log!(
|
||||
"[HttpWriter::poll_write] url: {}, method: {:?}, buf.len: {}",
|
||||
self.url,
|
||||
self.method,
|
||||
buf.len()
|
||||
);
|
||||
if let Ok(e) = Pin::new(&mut self.err_rx).try_recv() {
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
|
||||
self.sender
|
||||
.try_send(Some(Bytes::copy_from_slice(buf)))
|
||||
.map_err(|e| Error::other(format!("HttpWriter send error: {}", e)))?;
|
||||
|
||||
Poll::Ready(Ok(buf.len()))
|
||||
}
|
||||
|
||||
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
|
||||
fn poll_shutdown(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
|
||||
if !self.finish {
|
||||
http_log!("[HttpWriter::poll_shutdown] url: {}, method: {:?}", self.url, self.method);
|
||||
self.sender
|
||||
.try_send(None)
|
||||
.map_err(|e| Error::other(format!("HttpWriter shutdown error: {}", e)))?;
|
||||
http_log!("[HttpWriter::poll_shutdown] sent shutdown signal to HTTP request");
|
||||
|
||||
self.finish = true;
|
||||
}
|
||||
// Wait for the HTTP request to complete
|
||||
use futures::FutureExt;
|
||||
match Pin::new(&mut self.get_mut().handle).poll_unpin(_cx) {
|
||||
Poll::Ready(Ok(_)) => {
|
||||
http_log!("[HttpWriter::poll_shutdown] HTTP request finished successfully");
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
http_log!("[HttpWriter::poll_shutdown] HTTP request failed: {e}");
|
||||
return Poll::Ready(Err(Error::other(format!("HTTP request failed: {}", e))));
|
||||
}
|
||||
Poll::Pending => {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use super::*;
|
||||
// use reqwest::Method;
|
||||
// use std::vec;
|
||||
// use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
// #[tokio::test]
|
||||
// async fn test_http_writer_err() {
|
||||
// // Use a real local server for integration, or mockito for unit test
|
||||
// // Here, we use the Go test server at 127.0.0.1:8081 (scripts/testfile.go)
|
||||
// let url = "http://127.0.0.1:8081/testfile".to_string();
|
||||
// let data = vec![42u8; 8];
|
||||
|
||||
// // Write
|
||||
// // 添加 header X-Deny-Write = 1 模拟不可写入的情况
|
||||
// let mut headers = HeaderMap::new();
|
||||
// headers.insert("X-Deny-Write", "1".parse().unwrap());
|
||||
// // 这里我们使用 PUT 方法
|
||||
// let writer_result = HttpWriter::new(url.clone(), Method::PUT, headers).await;
|
||||
// match writer_result {
|
||||
// Ok(mut writer) => {
|
||||
// // 如果能创建成功,写入应该报错
|
||||
// let write_result = writer.write_all(&data).await;
|
||||
// assert!(write_result.is_err(), "write_all should fail when server denies write");
|
||||
// if let Err(e) = write_result {
|
||||
// println!("write_all error: {e}");
|
||||
// }
|
||||
// let shutdown_result = writer.shutdown().await;
|
||||
// if let Err(e) = shutdown_result {
|
||||
// println!("shutdown error: {e}");
|
||||
// }
|
||||
// }
|
||||
// Err(e) => {
|
||||
// // 直接构造失败也可以
|
||||
// println!("HttpWriter::new error: {e}");
|
||||
// assert!(
|
||||
// e.to_string().contains("Empty PUT failed") || e.to_string().contains("Forbidden"),
|
||||
// "unexpected error: {e}"
|
||||
// );
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
// // Should not reach here
|
||||
// panic!("HttpWriter should not allow writing when server denies write");
|
||||
// }
|
||||
|
||||
// #[tokio::test]
|
||||
// async fn test_http_writer_and_reader_ok() {
|
||||
// // 使用本地 Go 测试服务器
|
||||
// let url = "http://127.0.0.1:8081/testfile".to_string();
|
||||
// let data = vec![99u8; 512 * 1024]; // 512KB of data
|
||||
|
||||
// // Write (不加 X-Deny-Write)
|
||||
// let headers = HeaderMap::new();
|
||||
// let mut writer = HttpWriter::new(url.clone(), Method::PUT, headers).await.unwrap();
|
||||
// writer.write_all(&data).await.unwrap();
|
||||
// writer.shutdown().await.unwrap();
|
||||
|
||||
// http_log!("Wrote {} bytes to {} (ok case)", data.len(), url);
|
||||
|
||||
// // Read back
|
||||
// let mut reader = HttpReader::with_capacity(url.clone(), Method::GET, HeaderMap::new(), 8192)
|
||||
// .await
|
||||
// .unwrap();
|
||||
// let mut buf = Vec::new();
|
||||
// reader.read_to_end(&mut buf).await.unwrap();
|
||||
// assert_eq!(buf, data);
|
||||
|
||||
// // println!("Read {} bytes from {} (ok case)", buf.len(), url);
|
||||
// // tokio::time::sleep(std::time::Duration::from_secs(2)).await; // Wait for server to process
|
||||
// // println!("[test_http_writer_and_reader_ok] completed successfully");
|
||||
// }
|
||||
// }
|
||||
103
crates/rio/src/lib.rs
Normal file
103
crates/rio/src/lib.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
mod limit_reader;
|
||||
use std::io::Cursor;
|
||||
|
||||
pub use limit_reader::LimitReader;
|
||||
|
||||
mod etag_reader;
|
||||
pub use etag_reader::EtagReader;
|
||||
|
||||
mod compress_reader;
|
||||
pub use compress_reader::{CompressReader, DecompressReader};
|
||||
|
||||
mod encrypt_reader;
|
||||
pub use encrypt_reader::{DecryptReader, EncryptReader};
|
||||
|
||||
mod hardlimit_reader;
|
||||
pub use hardlimit_reader::HardLimitReader;
|
||||
|
||||
mod hash_reader;
|
||||
pub use hash_reader::*;
|
||||
|
||||
pub mod compress;
|
||||
|
||||
pub mod reader;
|
||||
|
||||
mod writer;
|
||||
use tokio::io::{AsyncRead, BufReader};
|
||||
pub use writer::*;
|
||||
|
||||
mod http_reader;
|
||||
pub use http_reader::*;
|
||||
|
||||
mod bitrot;
|
||||
pub use bitrot::*;
|
||||
|
||||
mod etag;
|
||||
|
||||
pub trait Reader: tokio::io::AsyncRead + Unpin + Send + Sync + EtagResolvable + HashReaderDetector {}
|
||||
|
||||
// Trait for types that can be recursively searched for etag capability
|
||||
pub trait EtagResolvable {
|
||||
fn is_etag_reader(&self) -> bool {
|
||||
false
|
||||
}
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// Generic function that can work with any EtagResolvable type
|
||||
pub fn resolve_etag_generic<R>(reader: &mut R) -> Option<String>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
reader.try_resolve_etag()
|
||||
}
|
||||
|
||||
impl<T> EtagResolvable for BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> EtagResolvable for Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> EtagResolvable for Box<T> where T: EtagResolvable {}
|
||||
|
||||
/// Trait to detect and manipulate HashReader instances
|
||||
pub trait HashReaderDetector {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> HashReaderDetector for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> HashReaderDetector for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl HashReaderDetector for Box<dyn AsyncRead + Unpin + Send + Sync> {}
|
||||
|
||||
impl<T> HashReaderDetector for Box<T> where T: HashReaderDetector {}
|
||||
|
||||
// Blanket implementations for Reader trait
|
||||
impl<T> Reader for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> Reader for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> Reader for Box<T> where T: Reader {}
|
||||
|
||||
// Forward declarations for wrapper types that implement all required traits
|
||||
impl Reader for crate::HashReader {}
|
||||
|
||||
impl Reader for HttpReader {}
|
||||
|
||||
impl Reader for crate::HardLimitReader {}
|
||||
impl Reader for crate::EtagReader {}
|
||||
|
||||
impl<R> Reader for crate::EncryptReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::DecryptReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::CompressReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::DecompressReader<R> where R: Reader {}
|
||||
188
crates/rio/src/limit_reader.rs
Normal file
188
crates/rio/src/limit_reader.rs
Normal file
@@ -0,0 +1,188 @@
|
||||
//! LimitReader: a wrapper for AsyncRead that limits the total number of bytes read.
|
||||
//!
|
||||
//! # Example
|
||||
//! ```
|
||||
//! use tokio::io::{AsyncReadExt, BufReader};
|
||||
//! use rustfs_rio::LimitReader;
|
||||
//!
|
||||
//! #[tokio::main]
|
||||
//! async fn main() {
|
||||
//! let data = b"hello world";
|
||||
//! let reader = BufReader::new(&data[..]);
|
||||
//! let mut limit_reader = LimitReader::new(reader, data.len() as u64);
|
||||
//!
|
||||
//! let mut buf = Vec::new();
|
||||
//! let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
//! assert_eq!(n, data.len());
|
||||
//! assert_eq!(&buf, data);
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use pin_project_lite::pin_project;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
|
||||
pin_project! {
|
||||
#[derive(Debug)]
|
||||
pub struct LimitReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
limit: u64,
|
||||
read: u64,
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper for AsyncRead that limits the total number of bytes read.
|
||||
impl<R> LimitReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
/// Create a new LimitReader wrapping `inner`, with a total read limit of `limit` bytes.
|
||||
pub fn new(inner: R, limit: u64) -> Self {
|
||||
Self { inner, limit, read: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for LimitReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<std::io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
let remaining = this.limit.saturating_sub(*this.read);
|
||||
if remaining == 0 {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
let orig_remaining = buf.remaining();
|
||||
let allowed = remaining.min(orig_remaining as u64) as usize;
|
||||
if allowed == 0 {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
if allowed == orig_remaining {
|
||||
let before_size = buf.filled().len();
|
||||
let poll = this.inner.as_mut().poll_read(cx, buf);
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let n = buf.filled().len() - before_size;
|
||||
*this.read += n as u64;
|
||||
}
|
||||
poll
|
||||
} else {
|
||||
let mut temp = vec![0u8; allowed];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp);
|
||||
let poll = this.inner.as_mut().poll_read(cx, &mut temp_buf);
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let n = temp_buf.filled().len();
|
||||
buf.put_slice(temp_buf.filled());
|
||||
*this.read += n as u64;
|
||||
}
|
||||
poll
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> EtagResolvable for LimitReader<R>
|
||||
where
|
||||
R: EtagResolvable,
|
||||
{
|
||||
fn try_resolve_etag(&mut self) -> Option<String> {
|
||||
self.inner.try_resolve_etag()
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> HashReaderDetector for LimitReader<R>
|
||||
where
|
||||
R: HashReaderDetector,
|
||||
{
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
self.inner.is_hash_reader()
|
||||
}
|
||||
fn as_hash_reader_mut(&mut self) -> Option<&mut dyn HashReaderMut> {
|
||||
self.inner.as_hash_reader_mut()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limit_reader_exact() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut limit_reader = LimitReader::new(reader, data.len() as u64);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, data.len());
|
||||
assert_eq!(&buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limit_reader_less_than_data() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut limit_reader = LimitReader::new(reader, 5);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, 5);
|
||||
assert_eq!(&buf, b"hello");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limit_reader_zero() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut limit_reader = LimitReader::new(reader, 0);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, 0);
|
||||
assert!(buf.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limit_reader_multiple_reads() {
|
||||
let data = b"abcdefghij";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut limit_reader = LimitReader::new(reader, 7);
|
||||
|
||||
let mut buf1 = [0u8; 3];
|
||||
let n1 = limit_reader.read(&mut buf1).await.unwrap();
|
||||
assert_eq!(n1, 3);
|
||||
assert_eq!(&buf1, b"abc");
|
||||
|
||||
let mut buf2 = [0u8; 5];
|
||||
let n2 = limit_reader.read(&mut buf2).await.unwrap();
|
||||
assert_eq!(n2, 4);
|
||||
assert_eq!(&buf2[..n2], b"defg");
|
||||
|
||||
let mut buf3 = [0u8; 2];
|
||||
let n3 = limit_reader.read(&mut buf3).await.unwrap();
|
||||
assert_eq!(n3, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_limit_reader_large_file() {
|
||||
use rand::Rng;
|
||||
// Generate a 3MB random byte array for testing
|
||||
let size = 3 * 1024 * 1024;
|
||||
let mut data = vec![0u8; size];
|
||||
rand::thread_rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut limit_reader = LimitReader::new(reader, size as u64);
|
||||
|
||||
// Read data into buffer
|
||||
let mut buf = Vec::new();
|
||||
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(n, size);
|
||||
assert_eq!(buf.len(), size);
|
||||
assert_eq!(&buf, &data);
|
||||
}
|
||||
}
|
||||
1
crates/rio/src/reader.rs
Normal file
1
crates/rio/src/reader.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
168
crates/rio/src/writer.rs
Normal file
168
crates/rio/src/writer.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use std::io::Cursor;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
use crate::HttpWriter;
|
||||
|
||||
pub enum Writer {
|
||||
Cursor(Cursor<Vec<u8>>),
|
||||
Http(HttpWriter),
|
||||
Other(Box<dyn AsyncWrite + Unpin + Send + Sync>),
|
||||
}
|
||||
|
||||
impl Writer {
|
||||
/// Create a Writer::Other from any AsyncWrite + Unpin + Send type.
|
||||
pub fn from_tokio_writer<W>(w: W) -> Self
|
||||
where
|
||||
W: AsyncWrite + Unpin + Send + Sync + 'static,
|
||||
{
|
||||
Writer::Other(Box::new(w))
|
||||
}
|
||||
|
||||
pub fn from_cursor(w: Cursor<Vec<u8>>) -> Self {
|
||||
Writer::Cursor(w)
|
||||
}
|
||||
|
||||
pub fn from_http(w: HttpWriter) -> Self {
|
||||
Writer::Http(w)
|
||||
}
|
||||
|
||||
pub fn into_cursor_inner(self) -> Option<Vec<u8>> {
|
||||
match self {
|
||||
Writer::Cursor(w) => Some(w.into_inner()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_cursor(&mut self) -> Option<&mut Cursor<Vec<u8>>> {
|
||||
match self {
|
||||
Writer::Cursor(w) => Some(w),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn as_http(&mut self) -> Option<&mut HttpWriter> {
|
||||
match self {
|
||||
Writer::Http(w) => Some(w),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_http(self) -> Option<HttpWriter> {
|
||||
match self {
|
||||
Writer::Http(w) => Some(w),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_cursor(self) -> Option<Cursor<Vec<u8>>> {
|
||||
match self {
|
||||
Writer::Cursor(w) => Some(w),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncWrite for Writer {
|
||||
fn poll_write(
|
||||
self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
buf: &[u8],
|
||||
) -> std::task::Poll<std::io::Result<usize>> {
|
||||
match self.get_mut() {
|
||||
Writer::Cursor(w) => Pin::new(w).poll_write(cx, buf),
|
||||
Writer::Http(w) => Pin::new(w).poll_write(cx, buf),
|
||||
Writer::Other(w) => Pin::new(w.as_mut()).poll_write(cx, buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> std::task::Poll<std::io::Result<()>> {
|
||||
match self.get_mut() {
|
||||
Writer::Cursor(w) => Pin::new(w).poll_flush(cx),
|
||||
Writer::Http(w) => Pin::new(w).poll_flush(cx),
|
||||
Writer::Other(w) => Pin::new(w.as_mut()).poll_flush(cx),
|
||||
}
|
||||
}
|
||||
fn poll_shutdown(self: std::pin::Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> std::task::Poll<std::io::Result<()>> {
|
||||
match self.get_mut() {
|
||||
Writer::Cursor(w) => Pin::new(w).poll_shutdown(cx),
|
||||
Writer::Http(w) => Pin::new(w).poll_shutdown(cx),
|
||||
Writer::Other(w) => Pin::new(w.as_mut()).poll_shutdown(cx),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// WriterAll wraps a Writer and ensures each write writes the entire buffer (like write_all).
|
||||
pub struct WriterAll<W: AsyncWrite + Unpin> {
|
||||
inner: W,
|
||||
}
|
||||
|
||||
impl<W: AsyncWrite + Unpin> WriterAll<W> {
|
||||
pub fn new(inner: W) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
/// Write the entire buffer, like write_all.
|
||||
pub async fn write_all(&mut self, mut buf: &[u8]) -> std::io::Result<()> {
|
||||
while !buf.is_empty() {
|
||||
let n = self.inner.write(buf).await?;
|
||||
if n == 0 {
|
||||
return Err(std::io::Error::new(std::io::ErrorKind::WriteZero, "failed to write whole buffer"));
|
||||
}
|
||||
buf = &buf[n..];
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the inner writer.
|
||||
pub fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: AsyncWrite + Unpin> AsyncWrite for WriterAll<W> {
|
||||
fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context<'_>, mut buf: &[u8]) -> Poll<std::io::Result<usize>> {
|
||||
let mut total_written = 0;
|
||||
while !buf.is_empty() {
|
||||
// Safety: W: Unpin
|
||||
let inner_pin = Pin::new(&mut self.inner);
|
||||
match inner_pin.poll_write(cx, buf) {
|
||||
Poll::Ready(Ok(0)) => {
|
||||
if total_written == 0 {
|
||||
return Poll::Ready(Ok(0));
|
||||
} else {
|
||||
return Poll::Ready(Ok(total_written));
|
||||
}
|
||||
}
|
||||
Poll::Ready(Ok(n)) => {
|
||||
total_written += n;
|
||||
buf = &buf[n..];
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
if total_written == 0 {
|
||||
return Poll::Ready(Err(e));
|
||||
} else {
|
||||
return Poll::Ready(Ok(total_written));
|
||||
}
|
||||
}
|
||||
Poll::Pending => {
|
||||
if total_written == 0 {
|
||||
return Poll::Pending;
|
||||
} else {
|
||||
return Poll::Ready(Ok(total_written));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Poll::Ready(Ok(total_written))
|
||||
}
|
||||
|
||||
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||
Pin::new(&mut self.inner).poll_flush(cx)
|
||||
}
|
||||
|
||||
fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||
Pin::new(&mut self.inner).poll_shutdown(cx)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user