Merge pull request #480 from rustfs/feat/compress

feat: add object compression support
This commit is contained in:
loverustfs
2025-06-17 20:20:45 +08:00
committed by GitHub
47 changed files with 1708 additions and 512 deletions

24
Cargo.lock generated
View File

@@ -3666,6 +3666,7 @@ dependencies = [
"shadow-rs",
"siphasher 1.0.1",
"smallvec",
"temp-env",
"tempfile",
"thiserror 2.0.12",
"time",
@@ -8435,25 +8436,23 @@ dependencies = [
"aes-gcm",
"async-trait",
"base64-simd",
"brotli 8.0.1",
"byteorder",
"bytes",
"crc32fast",
"criterion",
"flate2",
"futures",
"hex-simd",
"http 1.3.1",
"lz4",
"md-5",
"pin-project-lite",
"rand 0.9.1",
"reqwest",
"rustfs-utils",
"snap",
"serde",
"serde_json",
"tokio",
"tokio-test",
"tokio-util",
"zstd",
]
[[package]]
@@ -8489,14 +8488,18 @@ version = "0.0.1"
dependencies = [
"base64-simd",
"blake3",
"brotli 8.0.1",
"crc32fast",
"flate2",
"hex-simd",
"highway",
"lazy_static",
"local-ip-address",
"lz4",
"md-5",
"netif",
"nix 0.30.1",
"rand 0.9.1",
"regex",
"rustfs-config",
"rustls 0.23.27",
@@ -8505,11 +8508,13 @@ dependencies = [
"serde",
"sha2 0.10.9",
"siphasher 1.0.1",
"snap",
"tempfile",
"tokio",
"tracing",
"url",
"winapi",
"zstd",
]
[[package]]
@@ -9739,6 +9744,15 @@ version = "0.12.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "temp-env"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45107136c2ddf8c4b87453c02294fd0adf41751796e81e8ba3f7fd951977ab57"
dependencies = [
"once_cell",
]
[[package]]
name = "tempfile"
version = "3.20.0"

View File

@@ -157,6 +157,11 @@ prost = "0.13.5"
prost-build = "0.13.5"
protobuf = "3.7"
rand = "0.9.1"
brotli = "8.0.1"
flate2 = "1.1.1"
zstd = "0.13.3"
lz4 = "1.28.1"
snap = "1.1.1"
rdkafka = { version = "0.37.0", features = ["tokio"] }
reed-solomon-erasure = { version = "6.0.0", features = ["simd-accel"] }
reed-solomon-simd = { version = "3.0.0" }

View File

@@ -85,6 +85,11 @@ build-musl:
@echo "🔨 Building rustfs for x86_64-unknown-linux-musl..."
cargo build --target x86_64-unknown-linux-musl --bin rustfs -r
.PHONY: build-gnu
build-gnu:
@echo "🔨 Building rustfs for x86_64-unknown-linux-gnu..."
cargo build --target x86_64-unknown-linux-gnu --bin rustfs -r
.PHONY: deploy-dev
deploy-dev: build-musl
@echo "🚀 Deploying to dev server: $${IP}"

View File

@@ -1,5 +1,6 @@
use crate::error::{Error, Result};
use crate::headers::RESERVED_METADATA_PREFIX_LOWER;
use crate::headers::RUSTFS_HEALING;
use bytes::Bytes;
use rmp_serde::Serializer;
use rustfs_utils::HashAlgorithm;
@@ -9,9 +10,6 @@ use std::collections::HashMap;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::headers::RESERVED_METADATA_PREFIX;
use crate::headers::RUSTFS_HEALING;
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
@@ -24,10 +22,10 @@ pub struct ObjectPartInfo {
pub etag: String,
pub number: usize,
pub size: usize,
pub actual_size: usize, // Original data size
pub actual_size: i64, // Original data size
pub mod_time: Option<OffsetDateTime>,
// Index holds the index of the part in the erasure coding
pub index: Option<Vec<u8>>,
pub index: Option<Bytes>,
// Checksums holds checksums of the part
pub checksums: Option<HashMap<String, String>>,
}
@@ -118,15 +116,21 @@ impl ErasureInfo {
}
/// Calculate the total erasure file size for a given original size.
// Returns the final erasure size from the original size
pub fn shard_file_size(&self, total_length: usize) -> usize {
pub fn shard_file_size(&self, total_length: i64) -> i64 {
if total_length == 0 {
return 0;
}
if total_length < 0 {
return total_length;
}
let total_length = total_length as usize;
let num_shards = total_length / self.block_size;
let last_block_size = total_length % self.block_size;
let last_shard_size = calc_shard_size(last_block_size, self.data_blocks);
num_shards * self.shard_size() + last_shard_size
(num_shards * self.shard_size() + last_shard_size) as i64
}
/// Check if this ErasureInfo equals another ErasureInfo
@@ -156,7 +160,7 @@ pub struct FileInfo {
pub expire_restored: bool,
pub data_dir: Option<Uuid>,
pub mod_time: Option<OffsetDateTime>,
pub size: usize,
pub size: i64,
// File mode bits
pub mode: Option<u32>,
// WrittenByVersion is the unix time stamp of the version that created this version of the object
@@ -255,7 +259,8 @@ impl FileInfo {
etag: String,
part_size: usize,
mod_time: Option<OffsetDateTime>,
actual_size: usize,
actual_size: i64,
index: Option<Bytes>,
) {
let part = ObjectPartInfo {
etag,
@@ -263,7 +268,7 @@ impl FileInfo {
size: part_size,
mod_time,
actual_size,
index: None,
index,
checksums: None,
};
@@ -306,6 +311,12 @@ impl FileInfo {
self.metadata
.insert(format!("{}inline-data", RESERVED_METADATA_PREFIX_LOWER).to_owned(), "true".to_owned());
}
pub fn set_data_moved(&mut self) {
self.metadata
.insert(format!("{}data-moved", RESERVED_METADATA_PREFIX_LOWER).to_owned(), "true".to_owned());
}
pub fn inline_data(&self) -> bool {
self.metadata
.contains_key(format!("{}inline-data", RESERVED_METADATA_PREFIX_LOWER).as_str())
@@ -315,7 +326,7 @@ impl FileInfo {
/// Check if the object is compressed
pub fn is_compressed(&self) -> bool {
self.metadata
.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX))
.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER))
}
/// Check if the object is remote (transitioned to another tier)
@@ -429,7 +440,7 @@ impl FileInfoVersions {
}
/// Calculate the total size of all versions for this object
pub fn size(&self) -> usize {
pub fn size(&self) -> i64 {
self.versions.iter().map(|v| v.size).sum()
}
}

View File

@@ -6,6 +6,7 @@ use crate::headers::{
RESERVED_METADATA_PREFIX_LOWER, VERSION_PURGE_STATUS_KEY,
};
use byteorder::ByteOrder;
use bytes::Bytes;
use rmp::Marker;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
@@ -1379,9 +1380,9 @@ pub struct MetaObject {
pub part_numbers: Vec<usize>, // Part Numbers
pub part_etags: Vec<String>, // Part ETags
pub part_sizes: Vec<usize>, // Part Sizes
pub part_actual_sizes: Vec<usize>, // Part ActualSizes (compression)
pub part_indices: Vec<Vec<u8>>, // Part Indexes (compression)
pub size: usize, // Object version size
pub part_actual_sizes: Vec<i64>, // Part ActualSizes (compression)
pub part_indices: Vec<Bytes>, // Part Indexes (compression)
pub size: i64, // Object version size
pub mod_time: Option<OffsetDateTime>, // Object version modified time
pub meta_sys: HashMap<String, Vec<u8>>, // Object version internal metadata
pub meta_user: HashMap<String, String>, // Object version metadata set by user
@@ -1538,7 +1539,7 @@ impl MetaObject {
let mut buf = vec![0u8; blen as usize];
cur.read_exact(&mut buf)?;
indices.push(buf);
indices.push(Bytes::from(buf));
}
self.part_indices = indices;
@@ -1810,13 +1811,16 @@ impl MetaObject {
}
for (k, v) in &self.meta_sys {
if k == AMZ_STORAGE_CLASS && v == b"STANDARD" {
continue;
}
if k.starts_with(RESERVED_METADATA_PREFIX)
|| k.starts_with(RESERVED_METADATA_PREFIX_LOWER)
|| k == VERSION_PURGE_STATUS_KEY
{
continue;
metadata.insert(k.to_owned(), String::from_utf8(v.to_owned()).unwrap_or_default());
}
metadata.insert(k.to_owned(), String::from_utf8(v.to_owned()).unwrap_or_default());
}
// todo: ReplicationState,Delete
@@ -2799,13 +2803,13 @@ mod test {
// 2. 测试极大的文件大小
let large_object = MetaObject {
size: usize::MAX,
size: i64::MAX,
part_sizes: vec![usize::MAX],
..Default::default()
};
// 应该能够处理大数值
assert_eq!(large_object.size, usize::MAX);
assert_eq!(large_object.size, i64::MAX);
}
#[tokio::test]
@@ -3367,7 +3371,7 @@ pub struct DetailedVersionStats {
pub free_versions: usize,
pub versions_with_data_dir: usize,
pub versions_with_inline_data: usize,
pub total_size: usize,
pub total_size: i64,
pub latest_mod_time: Option<OffsetDateTime>,
}

View File

@@ -19,3 +19,5 @@ pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov";
pub const AMZ_OBJECT_TAGGING: &str = "X-Amz-Tagging";
pub const AMZ_BUCKET_REPLICATION_STATUS: &str = "X-Amz-Replication-Status";
pub const AMZ_DECODED_CONTENT_LENGTH: &str = "X-Amz-Decoded-Content-Length";
pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";

View File

@@ -91,7 +91,7 @@ pub fn create_complex_xlmeta() -> Result<Vec<u8>> {
let mut fm = FileMeta::new();
// 创建10个版本的对象
for i in 0..10 {
for i in 0i64..10i64 {
let version_id = Uuid::new_v4();
let data_dir = if i % 3 == 0 { Some(Uuid::new_v4()) } else { None };
@@ -113,9 +113,9 @@ pub fn create_complex_xlmeta() -> Result<Vec<u8>> {
part_numbers: vec![1],
part_etags: vec![format!("etag-{:08x}", i)],
part_sizes: vec![1024 * (i + 1) as usize],
part_actual_sizes: vec![1024 * (i + 1) as usize],
part_actual_sizes: vec![1024 * (i + 1)],
part_indices: Vec::new(),
size: 1024 * (i + 1) as usize,
size: 1024 * (i + 1),
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200 + i * 60)?),
meta_sys: HashMap::new(),
meta_user: metadata,
@@ -221,7 +221,7 @@ pub fn create_xlmeta_with_inline_data() -> Result<Vec<u8>> {
part_sizes: vec![inline_data.len()],
part_actual_sizes: Vec::new(),
part_indices: Vec::new(),
size: inline_data.len(),
size: inline_data.len() as i64,
mod_time: Some(OffsetDateTime::now_utc()),
meta_sys: HashMap::new(),
meta_user: HashMap::new(),

View File

@@ -14,23 +14,20 @@ tokio = { workspace = true, features = ["full"] }
rand = { workspace = true }
md-5 = { workspace = true }
http.workspace = true
flate2 = "1.1.1"
aes-gcm = "0.10.3"
crc32fast = "1.4.2"
pin-project-lite.workspace = true
async-trait.workspace = true
base64-simd = "0.8.0"
hex-simd = "0.8.0"
zstd = "0.13.3"
lz4 = "1.28.1"
brotli = "8.0.1"
snap = "1.1.1"
serde = { workspace = true }
bytes.workspace = true
reqwest.workspace = true
tokio-util.workspace = true
futures.workspace = true
rustfs-utils = {workspace = true, features= ["io","hash"]}
rustfs-utils = {workspace = true, features= ["io","hash","compress"]}
byteorder.workspace = true
serde_json.workspace = true
[dev-dependencies]
criterion = { version = "0.5.1", features = ["async", "async_tokio", "tokio"] }

View File

@@ -0,0 +1,672 @@
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use std::io::{self, Read, Seek, SeekFrom};
const S2_INDEX_HEADER: &[u8] = b"s2idx\x00";
const S2_INDEX_TRAILER: &[u8] = b"\x00xdi2s";
const MAX_INDEX_ENTRIES: usize = 1 << 16;
const MIN_INDEX_DIST: i64 = 1 << 20;
// const MIN_INDEX_DIST: i64 = 0;
pub trait TryGetIndex {
fn try_get_index(&self) -> Option<&Index> {
None
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Index {
pub total_uncompressed: i64,
pub total_compressed: i64,
info: Vec<IndexInfo>,
est_block_uncomp: i64,
}
impl Default for Index {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndexInfo {
pub compressed_offset: i64,
pub uncompressed_offset: i64,
}
#[allow(dead_code)]
impl Index {
pub fn new() -> Self {
Self {
total_uncompressed: -1,
total_compressed: -1,
info: Vec::new(),
est_block_uncomp: 0,
}
}
#[allow(dead_code)]
fn reset(&mut self, max_block: usize) {
self.est_block_uncomp = max_block as i64;
self.total_compressed = -1;
self.total_uncompressed = -1;
self.info.clear();
}
pub fn len(&self) -> usize {
self.info.len()
}
fn alloc_infos(&mut self, n: usize) {
if n > MAX_INDEX_ENTRIES {
panic!("n > MAX_INDEX_ENTRIES");
}
self.info = Vec::with_capacity(n);
}
pub fn add(&mut self, compressed_offset: i64, uncompressed_offset: i64) -> io::Result<()> {
if self.info.is_empty() {
self.info.push(IndexInfo {
compressed_offset,
uncompressed_offset,
});
return Ok(());
}
let last_idx = self.info.len() - 1;
let latest = &mut self.info[last_idx];
if latest.uncompressed_offset == uncompressed_offset {
latest.compressed_offset = compressed_offset;
return Ok(());
}
if latest.uncompressed_offset > uncompressed_offset {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"internal error: Earlier uncompressed received ({} > {})",
latest.uncompressed_offset, uncompressed_offset
),
));
}
if latest.compressed_offset > compressed_offset {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"internal error: Earlier compressed received ({} > {})",
latest.uncompressed_offset, uncompressed_offset
),
));
}
if latest.uncompressed_offset + MIN_INDEX_DIST > uncompressed_offset {
return Ok(());
}
self.info.push(IndexInfo {
compressed_offset,
uncompressed_offset,
});
self.total_compressed = compressed_offset;
self.total_uncompressed = uncompressed_offset;
Ok(())
}
pub fn find(&self, offset: i64) -> io::Result<(i64, i64)> {
if self.total_uncompressed < 0 {
return Err(io::Error::other("corrupt index"));
}
let mut offset = offset;
if offset < 0 {
offset += self.total_uncompressed;
if offset < 0 {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "offset out of bounds"));
}
}
if offset > self.total_uncompressed {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "offset out of bounds"));
}
if self.info.is_empty() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "empty index"));
}
if self.info.len() > 200 {
let n = self
.info
.binary_search_by(|info| {
if info.uncompressed_offset > offset {
std::cmp::Ordering::Greater
} else {
std::cmp::Ordering::Less
}
})
.unwrap_or_else(|i| i);
if n == 0 {
return Ok((self.info[0].compressed_offset, self.info[0].uncompressed_offset));
}
return Ok((self.info[n - 1].compressed_offset, self.info[n - 1].uncompressed_offset));
}
let mut compressed_off = 0;
let mut uncompressed_off = 0;
for info in &self.info {
if info.uncompressed_offset > offset {
break;
}
compressed_off = info.compressed_offset;
uncompressed_off = info.uncompressed_offset;
}
Ok((compressed_off, uncompressed_off))
}
fn reduce(&mut self) {
if self.info.len() < MAX_INDEX_ENTRIES && self.est_block_uncomp >= MIN_INDEX_DIST {
return;
}
let mut remove_n = (self.info.len() + 1) / MAX_INDEX_ENTRIES;
let src = self.info.clone();
let mut j = 0;
while self.est_block_uncomp * (remove_n as i64 + 1) < MIN_INDEX_DIST && self.info.len() / (remove_n + 1) > 1000 {
remove_n += 1;
}
let mut idx = 0;
while idx < src.len() {
self.info[j] = src[idx].clone();
j += 1;
idx += remove_n + 1;
}
self.info.truncate(j);
self.est_block_uncomp += self.est_block_uncomp * remove_n as i64;
}
pub fn into_vec(mut self) -> Bytes {
let mut b = Vec::new();
self.append_to(&mut b, self.total_uncompressed, self.total_compressed);
Bytes::from(b)
}
pub fn append_to(&mut self, b: &mut Vec<u8>, uncomp_total: i64, comp_total: i64) {
self.reduce();
let init_size = b.len();
// Add skippable header
b.extend_from_slice(&[0x50, 0x2A, 0x4D, 0x18]); // ChunkTypeIndex
b.extend_from_slice(&[0, 0, 0]); // Placeholder for chunk length
// Add header
b.extend_from_slice(S2_INDEX_HEADER);
// Add total sizes
let mut tmp = [0u8; 8];
let n = write_varint(&mut tmp, uncomp_total);
b.extend_from_slice(&tmp[..n]);
let n = write_varint(&mut tmp, comp_total);
b.extend_from_slice(&tmp[..n]);
let n = write_varint(&mut tmp, self.est_block_uncomp);
b.extend_from_slice(&tmp[..n]);
let n = write_varint(&mut tmp, self.info.len() as i64);
b.extend_from_slice(&tmp[..n]);
// Check if we should add uncompressed offsets
let mut has_uncompressed = 0u8;
for (idx, info) in self.info.iter().enumerate() {
if idx == 0 {
if info.uncompressed_offset != 0 {
has_uncompressed = 1;
break;
}
continue;
}
if info.uncompressed_offset != self.info[idx - 1].uncompressed_offset + self.est_block_uncomp {
has_uncompressed = 1;
break;
}
}
b.push(has_uncompressed);
// Add uncompressed offsets if needed
if has_uncompressed == 1 {
for (idx, info) in self.info.iter().enumerate() {
let mut u_off = info.uncompressed_offset;
if idx > 0 {
let prev = &self.info[idx - 1];
u_off -= prev.uncompressed_offset + self.est_block_uncomp;
}
let n = write_varint(&mut tmp, u_off);
b.extend_from_slice(&tmp[..n]);
}
}
// Add compressed offsets
let mut c_predict = self.est_block_uncomp / 2;
for (idx, info) in self.info.iter().enumerate() {
let mut c_off = info.compressed_offset;
if idx > 0 {
let prev = &self.info[idx - 1];
c_off -= prev.compressed_offset + c_predict;
c_predict += c_off / 2;
}
let n = write_varint(&mut tmp, c_off);
b.extend_from_slice(&tmp[..n]);
}
// Add total size and trailer
let total_size = (b.len() - init_size + 4 + S2_INDEX_TRAILER.len()) as u32;
b.extend_from_slice(&total_size.to_le_bytes());
b.extend_from_slice(S2_INDEX_TRAILER);
// Update chunk length
let chunk_len = b.len() - init_size - 4;
b[init_size + 1] = chunk_len as u8;
b[init_size + 2] = (chunk_len >> 8) as u8;
b[init_size + 3] = (chunk_len >> 16) as u8;
}
pub fn load<'a>(&mut self, mut b: &'a [u8]) -> io::Result<&'a [u8]> {
if b.len() <= 4 + S2_INDEX_HEADER.len() + S2_INDEX_TRAILER.len() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
}
if b[0] != 0x50 || b[1] != 0x2A || b[2] != 0x4D || b[3] != 0x18 {
return Err(io::Error::other("invalid chunk type"));
}
let chunk_len = (b[1] as usize) | ((b[2] as usize) << 8) | ((b[3] as usize) << 16);
b = &b[4..];
if b.len() < chunk_len {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
}
if !b.starts_with(S2_INDEX_HEADER) {
return Err(io::Error::other("invalid header"));
}
b = &b[S2_INDEX_HEADER.len()..];
// Read total uncompressed
let (v, n) = read_varint(b)?;
if v < 0 {
return Err(io::Error::other("invalid uncompressed size"));
}
self.total_uncompressed = v;
b = &b[n..];
// Read total compressed
let (v, n) = read_varint(b)?;
if v < 0 {
return Err(io::Error::other("invalid compressed size"));
}
self.total_compressed = v;
b = &b[n..];
// Read est block uncomp
let (v, n) = read_varint(b)?;
if v < 0 {
return Err(io::Error::other("invalid block size"));
}
self.est_block_uncomp = v;
b = &b[n..];
// Read number of entries
let (v, n) = read_varint(b)?;
if v < 0 || v > MAX_INDEX_ENTRIES as i64 {
return Err(io::Error::other("invalid number of entries"));
}
let entries = v as usize;
b = &b[n..];
self.alloc_infos(entries);
if b.is_empty() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
}
let has_uncompressed = b[0];
b = &b[1..];
if has_uncompressed & 1 != has_uncompressed {
return Err(io::Error::other("invalid uncompressed flag"));
}
// Read uncompressed offsets
for idx in 0..entries {
let mut u_off = 0i64;
if has_uncompressed != 0 {
let (v, n) = read_varint(b)?;
u_off = v;
b = &b[n..];
}
if idx > 0 {
let prev = self.info[idx - 1].uncompressed_offset;
u_off += prev + self.est_block_uncomp;
if u_off <= prev {
return Err(io::Error::other("invalid offset"));
}
}
if u_off < 0 {
return Err(io::Error::other("negative offset"));
}
self.info[idx].uncompressed_offset = u_off;
}
// Read compressed offsets
let mut c_predict = self.est_block_uncomp / 2;
for idx in 0..entries {
let (v, n) = read_varint(b)?;
let mut c_off = v;
b = &b[n..];
if idx > 0 {
c_predict += c_off / 2;
let prev = self.info[idx - 1].compressed_offset;
c_off += prev + c_predict;
if c_off <= prev {
return Err(io::Error::other("invalid offset"));
}
}
if c_off < 0 {
return Err(io::Error::other("negative offset"));
}
self.info[idx].compressed_offset = c_off;
}
if b.len() < 4 + S2_INDEX_TRAILER.len() {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
}
// Skip size
b = &b[4..];
// Check trailer
if !b.starts_with(S2_INDEX_TRAILER) {
return Err(io::Error::other("invalid trailer"));
}
Ok(&b[S2_INDEX_TRAILER.len()..])
}
pub fn load_stream<R: Read + Seek>(&mut self, mut rs: R) -> io::Result<()> {
// Go to end
rs.seek(SeekFrom::End(-10))?;
let mut tmp = [0u8; 10];
rs.read_exact(&mut tmp)?;
// Check trailer
if &tmp[4..4 + S2_INDEX_TRAILER.len()] != S2_INDEX_TRAILER {
return Err(io::Error::other("invalid trailer"));
}
let sz = u32::from_le_bytes(tmp[..4].try_into().unwrap());
if sz > 0x7fffffff {
return Err(io::Error::other("size too large"));
}
rs.seek(SeekFrom::End(-(sz as i64)))?;
let mut buf = vec![0u8; sz as usize];
rs.read_exact(&mut buf)?;
self.load(&buf)?;
Ok(())
}
pub fn to_json(&self) -> serde_json::Result<Vec<u8>> {
#[derive(Serialize)]
struct Offset {
compressed: i64,
uncompressed: i64,
}
#[derive(Serialize)]
struct IndexJson {
total_uncompressed: i64,
total_compressed: i64,
offsets: Vec<Offset>,
est_block_uncompressed: i64,
}
let json = IndexJson {
total_uncompressed: self.total_uncompressed,
total_compressed: self.total_compressed,
offsets: self
.info
.iter()
.map(|info| Offset {
compressed: info.compressed_offset,
uncompressed: info.uncompressed_offset,
})
.collect(),
est_block_uncompressed: self.est_block_uncomp,
};
serde_json::to_vec_pretty(&json)
}
}
// Helper functions for varint encoding/decoding
fn write_varint(buf: &mut [u8], mut v: i64) -> usize {
let mut n = 0;
while v >= 0x80 {
buf[n] = (v as u8) | 0x80;
v >>= 7;
n += 1;
}
buf[n] = v as u8;
n + 1
}
fn read_varint(buf: &[u8]) -> io::Result<(i64, usize)> {
let mut result = 0i64;
let mut shift = 0;
let mut n = 0;
while n < buf.len() {
let byte = buf[n];
n += 1;
result |= ((byte & 0x7F) as i64) << shift;
if byte < 0x80 {
return Ok((result, n));
}
shift += 7;
}
Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected EOF"))
}
// Helper functions for index header manipulation
#[allow(dead_code)]
pub fn remove_index_headers(b: &[u8]) -> Option<&[u8]> {
if b.len() < 4 + S2_INDEX_TRAILER.len() {
return None;
}
// Skip size
let b = &b[4..];
// Check trailer
if !b.starts_with(S2_INDEX_TRAILER) {
return None;
}
Some(&b[S2_INDEX_TRAILER.len()..])
}
#[allow(dead_code)]
pub fn restore_index_headers(in_data: &[u8]) -> Vec<u8> {
if in_data.is_empty() {
return Vec::new();
}
let mut b = Vec::with_capacity(4 + S2_INDEX_HEADER.len() + in_data.len() + S2_INDEX_TRAILER.len() + 4);
b.extend_from_slice(&[0x50, 0x2A, 0x4D, 0x18]);
b.extend_from_slice(S2_INDEX_HEADER);
b.extend_from_slice(in_data);
let total_size = (b.len() + 4 + S2_INDEX_TRAILER.len()) as u32;
b.extend_from_slice(&total_size.to_le_bytes());
b.extend_from_slice(S2_INDEX_TRAILER);
let chunk_len = b.len() - 4;
b[1] = chunk_len as u8;
b[2] = (chunk_len >> 8) as u8;
b[3] = (chunk_len >> 16) as u8;
b
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_index_new() {
let index = Index::new();
assert_eq!(index.total_uncompressed, -1);
assert_eq!(index.total_compressed, -1);
assert!(index.info.is_empty());
assert_eq!(index.est_block_uncomp, 0);
}
#[test]
fn test_index_add() -> io::Result<()> {
let mut index = Index::new();
// 测试添加第一个索引
index.add(100, 1000)?;
assert_eq!(index.info.len(), 1);
assert_eq!(index.info[0].compressed_offset, 100);
assert_eq!(index.info[0].uncompressed_offset, 1000);
// 测试添加相同未压缩偏移量的索引
index.add(200, 1000)?;
assert_eq!(index.info.len(), 1);
assert_eq!(index.info[0].compressed_offset, 200);
assert_eq!(index.info[0].uncompressed_offset, 1000);
// 测试添加新的索引(确保距离足够大)
index.add(300, 2000 + MIN_INDEX_DIST)?;
assert_eq!(index.info.len(), 2);
assert_eq!(index.info[1].compressed_offset, 300);
assert_eq!(index.info[1].uncompressed_offset, 2000 + MIN_INDEX_DIST);
Ok(())
}
#[test]
fn test_index_add_errors() {
let mut index = Index::new();
// 添加初始索引
index.add(100, 1000).unwrap();
// 测试添加更小的未压缩偏移量
let err = index.add(200, 500).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
// 测试添加更小的压缩偏移量
let err = index.add(50, 2000).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
}
#[test]
fn test_index_find() -> io::Result<()> {
let mut index = Index::new();
index.total_uncompressed = 1000 + MIN_INDEX_DIST * 3;
index.total_compressed = 5000;
// 添加一些测试数据,确保索引间距满足 MIN_INDEX_DIST 要求
index.add(100, 1000)?;
index.add(300, 1000 + MIN_INDEX_DIST)?;
index.add(500, 1000 + MIN_INDEX_DIST * 2)?;
// 测试查找存在的偏移量
let (comp, uncomp) = index.find(1500)?;
assert_eq!(comp, 100);
assert_eq!(uncomp, 1000);
// 测试查找边界值
let (comp, uncomp) = index.find(1000 + MIN_INDEX_DIST)?;
assert_eq!(comp, 300);
assert_eq!(uncomp, 1000 + MIN_INDEX_DIST);
// 测试查找最后一个索引
let (comp, uncomp) = index.find(1000 + MIN_INDEX_DIST * 2)?;
assert_eq!(comp, 500);
assert_eq!(uncomp, 1000 + MIN_INDEX_DIST * 2);
Ok(())
}
#[test]
fn test_index_find_errors() {
let mut index = Index::new();
index.total_uncompressed = 10000;
index.total_compressed = 5000;
// 测试未初始化的索引
let uninit_index = Index::new();
let err = uninit_index.find(1000).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::Other);
// 测试超出范围的偏移量
let err = index.find(15000).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
// 测试负数偏移量
let err = match index.find(-1000) {
Ok(_) => panic!("should be error"),
Err(e) => e,
};
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
}
#[test]
fn test_index_reduce() {
let mut index = Index::new();
index.est_block_uncomp = MIN_INDEX_DIST;
// 添加超过最大索引数量的条目,确保间距满足 MIN_INDEX_DIST 要求
for i in 0..MAX_INDEX_ENTRIES + 100 {
index.add(i as i64 * 100, i as i64 * MIN_INDEX_DIST).unwrap();
}
// 手动调用 reduce 方法
index.reduce();
// 验证索引数量是否被正确减少
assert!(index.info.len() <= MAX_INDEX_ENTRIES);
}
#[test]
fn test_index_json() -> io::Result<()> {
let mut index = Index::new();
// 添加一些测试数据
index.add(100, 1000)?;
index.add(300, 2000 + MIN_INDEX_DIST)?;
// 测试 JSON 序列化
let json = index.to_json().unwrap();
let json_str = String::from_utf8(json).unwrap();
println!("json_str: {}", json_str);
// 验证 JSON 内容
assert!(json_str.contains("\"compressed\": 100"));
assert!(json_str.contains("\"uncompressed\": 1000"));
assert!(json_str.contains("\"est_block_uncompressed\": 0"));
Ok(())
}
}

View File

@@ -1,12 +1,22 @@
use crate::compress::{CompressionAlgorithm, compress_block, decompress_block};
use crate::compress_index::{Index, TryGetIndex};
use crate::{EtagResolvable, HashReaderDetector};
use crate::{HashReaderMut, Reader};
use pin_project_lite::pin_project;
use rustfs_utils::{put_uvarint, put_uvarint_len, uvarint};
use rustfs_utils::compress::{CompressionAlgorithm, compress_block, decompress_block};
use rustfs_utils::{put_uvarint, uvarint};
use std::cmp::min;
use std::io::{self};
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, ReadBuf};
// use tracing::error;
const COMPRESS_TYPE_COMPRESSED: u8 = 0x00;
const COMPRESS_TYPE_UNCOMPRESSED: u8 = 0x01;
const COMPRESS_TYPE_END: u8 = 0xFF;
const DEFAULT_BLOCK_SIZE: usize = 1 << 20; // 1MB
const HEADER_LEN: usize = 8;
pin_project! {
#[derive(Debug)]
@@ -19,6 +29,11 @@ pin_project! {
done: bool,
block_size: usize,
compression_algorithm: CompressionAlgorithm,
index: Index,
written: usize,
uncomp_written: usize,
temp_buffer: Vec<u8>,
temp_pos: usize,
}
}
@@ -33,7 +48,12 @@ where
pos: 0,
done: false,
compression_algorithm,
block_size: 1 << 20, // Default 1MB
block_size: DEFAULT_BLOCK_SIZE,
index: Index::new(),
written: 0,
uncomp_written: 0,
temp_buffer: Vec::with_capacity(DEFAULT_BLOCK_SIZE), // Pre-allocate capacity
temp_pos: 0,
}
}
@@ -46,19 +66,33 @@ where
done: false,
compression_algorithm,
block_size,
index: Index::new(),
written: 0,
uncomp_written: 0,
temp_buffer: Vec::with_capacity(block_size),
temp_pos: 0,
}
}
}
impl<R> TryGetIndex for CompressReader<R>
where
R: Reader,
{
fn try_get_index(&self) -> Option<&Index> {
Some(&self.index)
}
}
impl<R> AsyncRead for CompressReader<R>
where
R: AsyncRead + Unpin + Send + Sync,
{
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
let mut this = self.project();
// If buffer has data, serve from buffer first
// Copy from buffer first if available
if *this.pos < this.buffer.len() {
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.pos);
let to_copy = min(buf.remaining(), this.buffer.len() - *this.pos);
buf.put_slice(&this.buffer[*this.pos..*this.pos + to_copy]);
*this.pos += to_copy;
if *this.pos == this.buffer.len() {
@@ -67,74 +101,60 @@ where
}
return Poll::Ready(Ok(()));
}
if *this.done {
return Poll::Ready(Ok(()));
}
// Read from inner, only read block_size bytes each time
let mut temp = vec![0u8; *this.block_size];
let mut temp_buf = ReadBuf::new(&mut temp);
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
Poll::Pending => Poll::Pending,
Poll::Ready(Ok(())) => {
let n = temp_buf.filled().len();
if n == 0 {
// EOF, write end header
let mut header = [0u8; 8];
header[0] = 0xFF;
*this.buffer = header.to_vec();
*this.pos = 0;
*this.done = true;
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
buf.put_slice(&this.buffer[..to_copy]);
*this.pos += to_copy;
Poll::Ready(Ok(()))
} else {
let uncompressed_data = &temp_buf.filled()[..n];
let crc = crc32fast::hash(uncompressed_data);
let compressed_data = compress_block(uncompressed_data, *this.compression_algorithm);
let uncompressed_len = n;
let compressed_len = compressed_data.len();
let int_len = put_uvarint_len(uncompressed_len as u64);
let len = compressed_len + int_len + 4; // 4 bytes for CRC32
// Header: 8 bytes
// 0: type (0 = compressed, 1 = uncompressed, 0xFF = end)
// 1-3: length (little endian u24)
// 4-7: crc32 (little endian u32)
let mut header = [0u8; 8];
header[0] = 0x00; // 0 = compressed
header[1] = (len & 0xFF) as u8;
header[2] = ((len >> 8) & 0xFF) as u8;
header[3] = ((len >> 16) & 0xFF) as u8;
header[4] = (crc & 0xFF) as u8;
header[5] = ((crc >> 8) & 0xFF) as u8;
header[6] = ((crc >> 16) & 0xFF) as u8;
header[7] = ((crc >> 24) & 0xFF) as u8;
// Combine header(4+4) + uncompressed_len + compressed
let mut out = Vec::with_capacity(len + 4);
out.extend_from_slice(&header);
let mut uncompressed_len_buf = vec![0u8; int_len];
put_uvarint(&mut uncompressed_len_buf, uncompressed_len as u64);
out.extend_from_slice(&uncompressed_len_buf);
out.extend_from_slice(&compressed_data);
*this.buffer = out;
*this.pos = 0;
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
buf.put_slice(&this.buffer[..to_copy]);
*this.pos += to_copy;
Poll::Ready(Ok(()))
// Fill temporary buffer
while this.temp_buffer.len() < *this.block_size {
let remaining = *this.block_size - this.temp_buffer.len();
let mut temp = vec![0u8; remaining];
let mut temp_buf = ReadBuf::new(&mut temp);
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
Poll::Pending => {
if this.temp_buffer.is_empty() {
return Poll::Pending;
}
break;
}
Poll::Ready(Ok(())) => {
let n = temp_buf.filled().len();
if n == 0 {
if this.temp_buffer.is_empty() {
return Poll::Ready(Ok(()));
}
break;
}
this.temp_buffer.extend_from_slice(&temp[..n]);
}
Poll::Ready(Err(e)) => {
// error!("CompressReader poll_read: read inner error: {e}");
return Poll::Ready(Err(e));
}
}
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
}
// Process accumulated data
if !this.temp_buffer.is_empty() {
let uncompressed_data = &this.temp_buffer;
let out = build_compressed_block(uncompressed_data, *this.compression_algorithm);
*this.written += out.len();
*this.uncomp_written += uncompressed_data.len();
if let Err(e) = this.index.add(*this.written as i64, *this.uncomp_written as i64) {
// error!("CompressReader index add error: {e}");
return Poll::Ready(Err(e));
}
*this.buffer = out;
*this.pos = 0;
this.temp_buffer.truncate(0); // More efficient way to clear
let to_copy = min(buf.remaining(), this.buffer.len());
buf.put_slice(&this.buffer[..to_copy]);
*this.pos += to_copy;
if *this.pos == this.buffer.len() {
this.buffer.clear();
*this.pos = 0;
}
Poll::Ready(Ok(()))
} else {
Poll::Pending
}
}
}
@@ -163,9 +183,10 @@ where
pin_project! {
/// A reader wrapper that decompresses data on the fly using DEFLATE algorithm.
// 1~3 bytes store the length of the compressed data
// The first byte stores the type of the compressed data: 00 = compressed, 01 = uncompressed
// The first 4 bytes store the CRC32 checksum of the compressed data
/// Header format:
/// - First byte: compression type (00 = compressed, 01 = uncompressed, FF = end)
/// - Bytes 1-3: length of compressed data (little-endian)
/// - Bytes 4-7: CRC32 checksum of uncompressed data (little-endian)
#[derive(Debug)]
pub struct DecompressReader<R> {
#[pin]
@@ -173,11 +194,11 @@ pin_project! {
buffer: Vec<u8>,
buffer_pos: usize,
finished: bool,
// New fields for saving header read progress across polls
// Fields for saving header read progress across polls
header_buf: [u8; 8],
header_read: usize,
header_done: bool,
// New fields for saving compressed block read progress across polls
// Fields for saving compressed block read progress across polls
compressed_buf: Option<Vec<u8>>,
compressed_read: usize,
compressed_len: usize,
@@ -187,7 +208,7 @@ pin_project! {
impl<R> DecompressReader<R>
where
R: Reader,
R: AsyncRead + Unpin + Send + Sync,
{
pub fn new(inner: R, compression_algorithm: CompressionAlgorithm) -> Self {
Self {
@@ -212,9 +233,9 @@ where
{
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
let mut this = self.project();
// Serve from buffer if any
// Copy from buffer first if available
if *this.buffer_pos < this.buffer.len() {
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
let to_copy = min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
buf.put_slice(&this.buffer[*this.buffer_pos..*this.buffer_pos + to_copy]);
*this.buffer_pos += to_copy;
if *this.buffer_pos == this.buffer.len() {
@@ -223,15 +244,13 @@ where
}
return Poll::Ready(Ok(()));
}
if *this.finished {
return Poll::Ready(Ok(()));
}
// Read header, support saving progress across polls
while !*this.header_done && *this.header_read < 8 {
let mut temp = [0u8; 8];
let mut temp_buf = ReadBuf::new(&mut temp[0..8 - *this.header_read]);
// Read header
while !*this.header_done && *this.header_read < HEADER_LEN {
let mut temp = [0u8; HEADER_LEN];
let mut temp_buf = ReadBuf::new(&mut temp[0..HEADER_LEN - *this.header_read]);
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
Poll::Pending => return Poll::Pending,
Poll::Ready(Ok(())) => {
@@ -243,34 +262,27 @@ where
*this.header_read += n;
}
Poll::Ready(Err(e)) => {
// error!("DecompressReader poll_read: read header error: {e}");
return Poll::Ready(Err(e));
}
}
if *this.header_read < 8 {
// Header not fully read, return Pending or Ok, wait for next poll
if *this.header_read < HEADER_LEN {
return Poll::Pending;
}
}
if !*this.header_done && *this.header_read == 0 {
return Poll::Ready(Ok(()));
}
let typ = this.header_buf[0];
let len = (this.header_buf[1] as usize) | ((this.header_buf[2] as usize) << 8) | ((this.header_buf[3] as usize) << 16);
let crc = (this.header_buf[4] as u32)
| ((this.header_buf[5] as u32) << 8)
| ((this.header_buf[6] as u32) << 16)
| ((this.header_buf[7] as u32) << 24);
// Header is used up, reset header_read
*this.header_read = 0;
*this.header_done = true;
if typ == 0xFF {
*this.finished = true;
return Poll::Ready(Ok(()));
}
// Save compressed block read progress across polls
if this.compressed_buf.is_none() {
*this.compressed_len = len - 4;
*this.compressed_len = len;
*this.compressed_buf = Some(vec![0u8; *this.compressed_len]);
*this.compressed_read = 0;
}
@@ -287,6 +299,7 @@ where
*this.compressed_read += n;
}
Poll::Ready(Err(e)) => {
// error!("DecompressReader poll_read: read compressed block error: {e}");
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
@@ -294,44 +307,44 @@ where
}
}
}
// After reading all, unpack
let (uncompress_len, uvarint) = uvarint(&compressed_buf[0..16]);
let compressed_data = &compressed_buf[uvarint as usize..];
let decompressed = if typ == 0x00 {
let decompressed = if typ == COMPRESS_TYPE_COMPRESSED {
match decompress_block(compressed_data, *this.compression_algorithm) {
Ok(out) => out,
Err(e) => {
// error!("DecompressReader decompress_block error: {e}");
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
return Poll::Ready(Err(e));
}
}
} else if typ == 0x01 {
} else if typ == COMPRESS_TYPE_UNCOMPRESSED {
compressed_data.to_vec()
} else if typ == 0xFF {
// Handle end marker
} else if typ == COMPRESS_TYPE_END {
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
*this.finished = true;
return Poll::Ready(Ok(()));
} else {
// error!("DecompressReader unknown compression type: {typ}");
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Unknown compression type")));
};
if decompressed.len() != uncompress_len as usize {
// error!("DecompressReader decompressed length mismatch: {} != {}", decompressed.len(), uncompress_len);
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Decompressed length mismatch")));
}
let actual_crc = crc32fast::hash(&decompressed);
if actual_crc != crc {
// error!("DecompressReader CRC32 mismatch: actual {actual_crc} != expected {crc}");
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
@@ -339,15 +352,17 @@ where
}
*this.buffer = decompressed;
*this.buffer_pos = 0;
// Clear compressed block state for next block
this.compressed_buf.take();
*this.compressed_read = 0;
*this.compressed_len = 0;
*this.header_done = false;
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
let to_copy = min(buf.remaining(), this.buffer.len());
buf.put_slice(&this.buffer[..to_copy]);
*this.buffer_pos += to_copy;
if *this.buffer_pos == this.buffer.len() {
this.buffer.clear();
*this.buffer_pos = 0;
}
Poll::Ready(Ok(()))
}
}
@@ -373,8 +388,34 @@ where
}
}
/// Build compressed block with header + uvarint + compressed data
fn build_compressed_block(uncompressed_data: &[u8], compression_algorithm: CompressionAlgorithm) -> Vec<u8> {
let crc = crc32fast::hash(uncompressed_data);
let compressed_data = compress_block(uncompressed_data, compression_algorithm);
let uncompressed_len = uncompressed_data.len();
let mut uncompressed_len_buf = [0u8; 10];
let int_len = put_uvarint(&mut uncompressed_len_buf[..], uncompressed_len as u64);
let len = compressed_data.len() + int_len;
let mut header = [0u8; HEADER_LEN];
header[0] = COMPRESS_TYPE_COMPRESSED;
header[1] = (len & 0xFF) as u8;
header[2] = ((len >> 8) & 0xFF) as u8;
header[3] = ((len >> 16) & 0xFF) as u8;
header[4] = (crc & 0xFF) as u8;
header[5] = ((crc >> 8) & 0xFF) as u8;
header[6] = ((crc >> 16) & 0xFF) as u8;
header[7] = ((crc >> 24) & 0xFF) as u8;
let mut out = Vec::with_capacity(len + HEADER_LEN);
out.extend_from_slice(&header);
out.extend_from_slice(&uncompressed_len_buf[..int_len]);
out.extend_from_slice(&compressed_data);
out
}
#[cfg(test)]
mod tests {
use crate::WarpReader;
use super::*;
use std::io::Cursor;
use tokio::io::{AsyncReadExt, BufReader};
@@ -383,7 +424,7 @@ mod tests {
async fn test_compress_reader_basic() {
let data = b"hello world, hello world, hello world!";
let reader = Cursor::new(&data[..]);
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
let mut compressed = Vec::new();
compress_reader.read_to_end(&mut compressed).await.unwrap();
@@ -400,7 +441,7 @@ mod tests {
async fn test_compress_reader_basic_deflate() {
let data = b"hello world, hello world, hello world!";
let reader = BufReader::new(&data[..]);
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Deflate);
let mut compressed = Vec::new();
compress_reader.read_to_end(&mut compressed).await.unwrap();
@@ -417,7 +458,7 @@ mod tests {
async fn test_compress_reader_empty() {
let data = b"";
let reader = BufReader::new(&data[..]);
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
let mut compressed = Vec::new();
compress_reader.read_to_end(&mut compressed).await.unwrap();
@@ -436,7 +477,7 @@ mod tests {
let mut data = vec![0u8; 1024 * 1024];
rand::rng().fill(&mut data[..]);
let reader = Cursor::new(data.clone());
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
let mut compressed = Vec::new();
compress_reader.read_to_end(&mut compressed).await.unwrap();
@@ -452,15 +493,15 @@ mod tests {
async fn test_compress_reader_large_deflate() {
use rand::Rng;
// Generate 1MB of random bytes
let mut data = vec![0u8; 1024 * 1024];
let mut data = vec![0u8; 1024 * 1024 * 3 + 512];
rand::rng().fill(&mut data[..]);
let reader = Cursor::new(data.clone());
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::default());
let mut compressed = Vec::new();
compress_reader.read_to_end(&mut compressed).await.unwrap();
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Deflate);
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::default());
let mut decompressed = Vec::new();
decompress_reader.read_to_end(&mut decompressed).await.unwrap();

View File

@@ -1,5 +1,6 @@
use crate::HashReaderDetector;
use crate::HashReaderMut;
use crate::compress_index::{Index, TryGetIndex};
use crate::{EtagResolvable, Reader};
use aes_gcm::aead::Aead;
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
@@ -145,6 +146,15 @@ where
}
}
impl<R> TryGetIndex for EncryptReader<R>
where
R: TryGetIndex,
{
fn try_get_index(&self) -> Option<&Index> {
self.inner.try_get_index()
}
}
pin_project! {
/// A reader wrapper that decrypts data on the fly using AES-256-GCM.
/// This is a demonstration. For production, use a secure and audited crypto library.
@@ -339,6 +349,8 @@ where
mod tests {
use std::io::Cursor;
use crate::WarpReader;
use super::*;
use rand::RngCore;
use tokio::io::{AsyncReadExt, BufReader};
@@ -352,7 +364,7 @@ mod tests {
rand::rng().fill_bytes(&mut nonce);
let reader = BufReader::new(&data[..]);
let encrypt_reader = EncryptReader::new(reader, key, nonce);
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
// Encrypt
let mut encrypt_reader = encrypt_reader;
@@ -361,7 +373,7 @@ mod tests {
// Decrypt using DecryptReader
let reader = Cursor::new(encrypted.clone());
let decrypt_reader = DecryptReader::new(reader, key, nonce);
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
let mut decrypt_reader = decrypt_reader;
let mut decrypted = Vec::new();
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
@@ -380,7 +392,7 @@ mod tests {
// Encrypt
let reader = BufReader::new(&data[..]);
let encrypt_reader = EncryptReader::new(reader, key, nonce);
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
let mut encrypt_reader = encrypt_reader;
let mut encrypted = Vec::new();
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
@@ -388,7 +400,7 @@ mod tests {
// Now test DecryptReader
let reader = Cursor::new(encrypted.clone());
let decrypt_reader = DecryptReader::new(reader, key, nonce);
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
let mut decrypt_reader = decrypt_reader;
let mut decrypted = Vec::new();
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
@@ -408,13 +420,13 @@ mod tests {
rand::rng().fill_bytes(&mut nonce);
let reader = std::io::Cursor::new(data.clone());
let encrypt_reader = EncryptReader::new(reader, key, nonce);
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
let mut encrypt_reader = encrypt_reader;
let mut encrypted = Vec::new();
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
let reader = std::io::Cursor::new(encrypted.clone());
let decrypt_reader = DecryptReader::new(reader, key, nonce);
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
let mut decrypt_reader = decrypt_reader;
let mut decrypted = Vec::new();
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();

View File

@@ -17,14 +17,15 @@ The `EtagResolvable` trait provides a clean way to handle recursive unwrapping:
```rust
use rustfs_rio::{CompressReader, EtagReader, resolve_etag_generic};
use rustfs_rio::compress::CompressionAlgorithm;
use rustfs_rio::WarpReader;
use rustfs_utils::compress::CompressionAlgorithm;
use tokio::io::BufReader;
use std::io::Cursor;
// Direct usage with trait-based approach
let data = b"test data";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let etag_reader = EtagReader::new(reader, Some("test_etag".to_string()));
let mut reader = CompressReader::new(etag_reader, CompressionAlgorithm::Gzip);
let etag = resolve_etag_generic(&mut reader);
@@ -34,9 +35,9 @@ let etag = resolve_etag_generic(&mut reader);
#[cfg(test)]
mod tests {
use crate::compress::CompressionAlgorithm;
use crate::resolve_etag_generic;
use crate::{CompressReader, EncryptReader, EtagReader, HashReader};
use crate::{WarpReader, resolve_etag_generic};
use rustfs_utils::compress::CompressionAlgorithm;
use std::io::Cursor;
use tokio::io::BufReader;
@@ -44,7 +45,7 @@ mod tests {
fn test_etag_reader_resolution() {
let data = b"test data";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, Some("test_etag".to_string()));
// Test direct ETag resolution
@@ -55,7 +56,7 @@ mod tests {
fn test_hash_reader_resolution() {
let data = b"test data";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut hash_reader =
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_etag".to_string()), false).unwrap();
@@ -67,7 +68,7 @@ mod tests {
fn test_compress_reader_delegation() {
let data = b"test data for compression";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let etag_reader = EtagReader::new(reader, Some("compress_etag".to_string()));
let mut compress_reader = CompressReader::new(etag_reader, CompressionAlgorithm::Gzip);
@@ -79,7 +80,7 @@ mod tests {
fn test_encrypt_reader_delegation() {
let data = b"test data for encryption";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let etag_reader = EtagReader::new(reader, Some("encrypt_etag".to_string()));
let key = [0u8; 32];
@@ -94,7 +95,7 @@ mod tests {
fn test_complex_nesting() {
let data = b"test data for complex nesting";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
// Create a complex nested structure: CompressReader<EncryptReader<EtagReader<BufReader<Cursor>>>>
let etag_reader = EtagReader::new(reader, Some("nested_etag".to_string()));
let key = [0u8; 32];
@@ -110,7 +111,7 @@ mod tests {
fn test_hash_reader_in_nested_structure() {
let data = b"test data for hash reader nesting";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
// Create nested structure: CompressReader<HashReader<BufReader<Cursor>>>
let hash_reader =
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_nested_etag".to_string()), false).unwrap();
@@ -127,14 +128,14 @@ mod tests {
// Test 1: Simple EtagReader
let data1 = b"simple test";
let reader1 = BufReader::new(Cursor::new(&data1[..]));
let reader1 = Box::new(reader1);
let reader1 = Box::new(WarpReader::new(reader1));
let mut etag_reader = EtagReader::new(reader1, Some("simple_etag".to_string()));
assert_eq!(resolve_etag_generic(&mut etag_reader), Some("simple_etag".to_string()));
// Test 2: HashReader with ETag
let data2 = b"hash test";
let reader2 = BufReader::new(Cursor::new(&data2[..]));
let reader2 = Box::new(reader2);
let reader2 = Box::new(WarpReader::new(reader2));
let mut hash_reader =
HashReader::new(reader2, data2.len() as i64, data2.len() as i64, Some("hash_etag".to_string()), false).unwrap();
assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string()));
@@ -142,7 +143,7 @@ mod tests {
// Test 3: Single wrapper - CompressReader<EtagReader>
let data3 = b"compress test";
let reader3 = BufReader::new(Cursor::new(&data3[..]));
let reader3 = Box::new(reader3);
let reader3 = Box::new(WarpReader::new(reader3));
let etag_reader3 = EtagReader::new(reader3, Some("compress_wrapped_etag".to_string()));
let mut compress_reader = CompressReader::new(etag_reader3, CompressionAlgorithm::Zstd);
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("compress_wrapped_etag".to_string()));
@@ -150,7 +151,7 @@ mod tests {
// Test 4: Double wrapper - CompressReader<EncryptReader<EtagReader>>
let data4 = b"double wrap test";
let reader4 = BufReader::new(Cursor::new(&data4[..]));
let reader4 = Box::new(reader4);
let reader4 = Box::new(WarpReader::new(reader4));
let etag_reader4 = EtagReader::new(reader4, Some("double_wrapped_etag".to_string()));
let key = [1u8; 32];
let nonce = [1u8; 12];
@@ -172,7 +173,7 @@ mod tests {
let data = b"Real world test data that might be compressed and encrypted";
let base_reader = BufReader::new(Cursor::new(&data[..]));
let base_reader = Box::new(base_reader);
let base_reader = Box::new(WarpReader::new(base_reader));
// Create a complex nested structure that might occur in practice:
// CompressReader<EncryptReader<HashReader<BufReader<Cursor>>>>
let hash_reader = HashReader::new(
@@ -197,7 +198,7 @@ mod tests {
// Test another complex nesting with EtagReader at the core
let data2 = b"Another real world scenario";
let base_reader2 = BufReader::new(Cursor::new(&data2[..]));
let base_reader2 = Box::new(base_reader2);
let base_reader2 = Box::new(WarpReader::new(base_reader2));
let etag_reader = EtagReader::new(base_reader2, Some("core_etag".to_string()));
let key2 = [99u8; 32];
let nonce2 = [88u8; 12];
@@ -223,21 +224,21 @@ mod tests {
// Test with HashReader that has no etag
let data = b"no etag test";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut hash_reader_no_etag = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
assert_eq!(resolve_etag_generic(&mut hash_reader_no_etag), None);
// Test with EtagReader that has None etag
let data2 = b"no etag test 2";
let reader2 = BufReader::new(Cursor::new(&data2[..]));
let reader2 = Box::new(reader2);
let reader2 = Box::new(WarpReader::new(reader2));
let mut etag_reader_none = EtagReader::new(reader2, None);
assert_eq!(resolve_etag_generic(&mut etag_reader_none), None);
// Test nested structure with no ETag at the core
let data3 = b"nested no etag test";
let reader3 = BufReader::new(Cursor::new(&data3[..]));
let reader3 = Box::new(reader3);
let reader3 = Box::new(WarpReader::new(reader3));
let etag_reader3 = EtagReader::new(reader3, None);
let mut compress_reader3 = CompressReader::new(etag_reader3, CompressionAlgorithm::Gzip);
assert_eq!(resolve_etag_generic(&mut compress_reader3), None);

View File

@@ -1,3 +1,4 @@
use crate::compress_index::{Index, TryGetIndex};
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
use md5::{Digest, Md5};
use pin_project_lite::pin_project;
@@ -82,8 +83,16 @@ impl HashReaderDetector for EtagReader {
}
}
impl TryGetIndex for EtagReader {
fn try_get_index(&self) -> Option<&Index> {
self.inner.try_get_index()
}
}
#[cfg(test)]
mod tests {
use crate::WarpReader;
use super::*;
use std::io::Cursor;
use tokio::io::{AsyncReadExt, BufReader};
@@ -95,7 +104,7 @@ mod tests {
hasher.update(data);
let expected = format!("{:x}", hasher.finalize());
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, None);
let mut buf = Vec::new();
@@ -114,7 +123,7 @@ mod tests {
hasher.update(data);
let expected = format!("{:x}", hasher.finalize());
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, None);
let mut buf = Vec::new();
@@ -133,7 +142,7 @@ mod tests {
hasher.update(data);
let expected = format!("{:x}", hasher.finalize());
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, None);
let mut buf = Vec::new();
@@ -150,7 +159,7 @@ mod tests {
async fn test_etag_reader_not_finished() {
let data = b"abc123";
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, None);
// Do not read to end, etag should be None
@@ -174,7 +183,7 @@ mod tests {
let expected = format!("{:x}", hasher.finalize());
let reader = Cursor::new(data.clone());
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, None);
let mut buf = Vec::new();
@@ -193,7 +202,7 @@ mod tests {
hasher.update(data);
let expected = format!("{:x}", hasher.finalize());
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, Some(expected.clone()));
let mut buf = Vec::new();
@@ -209,7 +218,7 @@ mod tests {
let data = b"checksum test data";
let wrong_checksum = "deadbeefdeadbeefdeadbeefdeadbeef".to_string();
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut etag_reader = EtagReader::new(reader, Some(wrong_checksum));
let mut buf = Vec::new();

View File

@@ -1,12 +1,11 @@
use crate::compress_index::{Index, TryGetIndex};
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
use pin_project_lite::pin_project;
use std::io::{Error, Result};
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, ReadBuf};
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
use pin_project_lite::pin_project;
pin_project! {
pub struct HardLimitReader {
#[pin]
@@ -60,10 +59,18 @@ impl HashReaderDetector for HardLimitReader {
}
}
impl TryGetIndex for HardLimitReader {
fn try_get_index(&self) -> Option<&Index> {
self.inner.try_get_index()
}
}
#[cfg(test)]
mod tests {
use std::vec;
use crate::WarpReader;
use super::*;
use rustfs_utils::read_full;
use tokio::io::{AsyncReadExt, BufReader};
@@ -72,7 +79,7 @@ mod tests {
async fn test_hardlimit_reader_normal() {
let data = b"hello world";
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hardlimit = HardLimitReader::new(reader, 20);
let mut r = hardlimit;
let mut buf = Vec::new();
@@ -85,7 +92,7 @@ mod tests {
async fn test_hardlimit_reader_exact_limit() {
let data = b"1234567890";
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hardlimit = HardLimitReader::new(reader, 10);
let mut r = hardlimit;
let mut buf = Vec::new();
@@ -98,7 +105,7 @@ mod tests {
async fn test_hardlimit_reader_exceed_limit() {
let data = b"abcdef";
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hardlimit = HardLimitReader::new(reader, 3);
let mut r = hardlimit;
let mut buf = vec![0u8; 10];
@@ -123,7 +130,7 @@ mod tests {
async fn test_hardlimit_reader_empty() {
let data = b"";
let reader = BufReader::new(&data[..]);
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hardlimit = HardLimitReader::new(reader, 5);
let mut r = hardlimit;
let mut buf = Vec::new();

View File

@@ -24,11 +24,12 @@
//! use rustfs_rio::{HashReader, HardLimitReader, EtagReader};
//! use tokio::io::BufReader;
//! use std::io::Cursor;
//! use rustfs_rio::WarpReader;
//!
//! # tokio_test::block_on(async {
//! let data = b"hello world";
//! let reader = BufReader::new(Cursor::new(&data[..]));
//! let reader = Box::new(reader);
//! let reader = Box::new(WarpReader::new(reader));
//! let size = data.len() as i64;
//! let actual_size = size;
//! let etag = None;
@@ -39,7 +40,7 @@
//!
//! // Method 2: With manual wrapping to recreate original logic
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
//! let reader2 = Box::new(reader2);
//! let reader2 = Box::new(WarpReader::new(reader2));
//! let wrapped_reader: Box<dyn rustfs_rio::Reader> = if size > 0 {
//! if !diskable_md5 {
//! // Wrap with both HardLimitReader and EtagReader
@@ -68,18 +69,19 @@
//! use rustfs_rio::{HashReader, HashReaderDetector};
//! use tokio::io::BufReader;
//! use std::io::Cursor;
//! use rustfs_rio::WarpReader;
//!
//! # tokio_test::block_on(async {
//! let data = b"test";
//! let reader = BufReader::new(Cursor::new(&data[..]));
//! let hash_reader = HashReader::new(Box::new(reader), 4, 4, None, false).unwrap();
//! let hash_reader = HashReader::new(Box::new(WarpReader::new(reader)), 4, 4, None, false).unwrap();
//!
//! // Check if a type is a HashReader
//! assert!(hash_reader.is_hash_reader());
//!
//! // Use new for compatibility (though it's simpler to use new() directly)
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
//! let result = HashReader::new(Box::new(reader2), 4, 4, None, false);
//! let result = HashReader::new(Box::new(WarpReader::new(reader2)), 4, 4, None, false);
//! assert!(result.is_ok());
//! # });
//! ```
@@ -89,6 +91,7 @@ use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, ReadBuf};
use crate::compress_index::{Index, TryGetIndex};
use crate::{EtagReader, EtagResolvable, HardLimitReader, HashReaderDetector, Reader};
/// Trait for mutable operations on HashReader
@@ -283,10 +286,16 @@ impl HashReaderDetector for HashReader {
}
}
impl TryGetIndex for HashReader {
fn try_get_index(&self) -> Option<&Index> {
self.inner.try_get_index()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{DecryptReader, encrypt_reader};
use crate::{DecryptReader, WarpReader, encrypt_reader};
use std::io::Cursor;
use tokio::io::{AsyncReadExt, BufReader};
@@ -299,14 +308,14 @@ mod tests {
// Test 1: Simple creation
let reader1 = BufReader::new(Cursor::new(&data[..]));
let reader1 = Box::new(reader1);
let reader1 = Box::new(WarpReader::new(reader1));
let hash_reader1 = HashReader::new(reader1, size, actual_size, etag.clone(), false).unwrap();
assert_eq!(hash_reader1.size(), size);
assert_eq!(hash_reader1.actual_size(), actual_size);
// Test 2: With HardLimitReader wrapping
let reader2 = BufReader::new(Cursor::new(&data[..]));
let reader2 = Box::new(reader2);
let reader2 = Box::new(WarpReader::new(reader2));
let hard_limit = HardLimitReader::new(reader2, size);
let hard_limit = Box::new(hard_limit);
let hash_reader2 = HashReader::new(hard_limit, size, actual_size, etag.clone(), false).unwrap();
@@ -315,7 +324,7 @@ mod tests {
// Test 3: With EtagReader wrapping
let reader3 = BufReader::new(Cursor::new(&data[..]));
let reader3 = Box::new(reader3);
let reader3 = Box::new(WarpReader::new(reader3));
let etag_reader = EtagReader::new(reader3, etag.clone());
let etag_reader = Box::new(etag_reader);
let hash_reader3 = HashReader::new(etag_reader, size, actual_size, etag.clone(), false).unwrap();
@@ -327,7 +336,7 @@ mod tests {
async fn test_hashreader_etag_basic() {
let data = b"hello hashreader";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
let mut buf = Vec::new();
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
@@ -341,7 +350,7 @@ mod tests {
async fn test_hashreader_diskable_md5() {
let data = b"no etag";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, true).unwrap();
let mut buf = Vec::new();
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
@@ -355,11 +364,11 @@ mod tests {
async fn test_hashreader_new_logic() {
let data = b"test data";
let reader = BufReader::new(Cursor::new(&data[..]));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
// Create a HashReader first
let hash_reader =
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false).unwrap();
let hash_reader = Box::new(hash_reader);
let hash_reader = Box::new(WarpReader::new(hash_reader));
// Now try to create another HashReader from the existing one using new
let result = HashReader::new(hash_reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false);
@@ -371,11 +380,11 @@ mod tests {
#[tokio::test]
async fn test_for_wrapping_readers() {
use crate::compress::CompressionAlgorithm;
use crate::{CompressReader, DecompressReader};
use md5::{Digest, Md5};
use rand::Rng;
use rand::RngCore;
use rustfs_utils::compress::CompressionAlgorithm;
// Generate 1MB random data
let size = 1024 * 1024;
@@ -397,7 +406,7 @@ mod tests {
let size = data.len() as i64;
let actual_size = data.len() as i64;
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
// 创建 HashReader
let mut hr = HashReader::new(reader, size, actual_size, Some(expected.clone()), false).unwrap();
@@ -427,7 +436,7 @@ mod tests {
if is_encrypt {
// 加密压缩后的数据
let encrypt_reader = encrypt_reader::EncryptReader::new(Cursor::new(compressed_data), key, nonce);
let encrypt_reader = encrypt_reader::EncryptReader::new(WarpReader::new(Cursor::new(compressed_data)), key, nonce);
let mut encrypted_data = Vec::new();
let mut encrypt_reader = encrypt_reader;
encrypt_reader.read_to_end(&mut encrypted_data).await.unwrap();
@@ -435,14 +444,15 @@ mod tests {
println!("Encrypted size: {}", encrypted_data.len());
// 解密数据
let decrypt_reader = DecryptReader::new(Cursor::new(encrypted_data), key, nonce);
let decrypt_reader = DecryptReader::new(WarpReader::new(Cursor::new(encrypted_data)), key, nonce);
let mut decrypt_reader = decrypt_reader;
let mut decrypted_data = Vec::new();
decrypt_reader.read_to_end(&mut decrypted_data).await.unwrap();
if is_compress {
// 如果使用了压缩,需要解压缩
let decompress_reader = DecompressReader::new(Cursor::new(decrypted_data), CompressionAlgorithm::Gzip);
let decompress_reader =
DecompressReader::new(WarpReader::new(Cursor::new(decrypted_data)), CompressionAlgorithm::Gzip);
let mut decompress_reader = decompress_reader;
let mut final_data = Vec::new();
decompress_reader.read_to_end(&mut final_data).await.unwrap();
@@ -460,7 +470,8 @@ mod tests {
// 如果不加密,直接处理压缩/解压缩
if is_compress {
let decompress_reader = DecompressReader::new(Cursor::new(compressed_data), CompressionAlgorithm::Gzip);
let decompress_reader =
DecompressReader::new(WarpReader::new(Cursor::new(compressed_data)), CompressionAlgorithm::Gzip);
let mut decompress_reader = decompress_reader;
let mut decompressed = Vec::new();
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
@@ -481,8 +492,8 @@ mod tests {
#[tokio::test]
async fn test_compression_with_compressible_data() {
use crate::compress::CompressionAlgorithm;
use crate::{CompressReader, DecompressReader};
use rustfs_utils::compress::CompressionAlgorithm;
// Create highly compressible data (repeated pattern)
let pattern = b"Hello, World! This is a test pattern that should compress well. ";
@@ -495,7 +506,7 @@ mod tests {
println!("Original data size: {} bytes", data.len());
let reader = BufReader::new(Cursor::new(data.clone()));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
// Test compression
@@ -525,8 +536,8 @@ mod tests {
#[tokio::test]
async fn test_compression_algorithms() {
use crate::compress::CompressionAlgorithm;
use crate::{CompressReader, DecompressReader};
use rustfs_utils::compress::CompressionAlgorithm;
let data = b"This is test data for compression algorithm testing. ".repeat(1000);
println!("Testing with {} bytes of data", data.len());
@@ -541,7 +552,7 @@ mod tests {
println!("\nTesting algorithm: {:?}", algorithm);
let reader = BufReader::new(Cursor::new(data.clone()));
let reader = Box::new(reader);
let reader = Box::new(WarpReader::new(reader));
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
// Compress

View File

@@ -1,11 +1,11 @@
mod limit_reader;
use std::io::Cursor;
pub use limit_reader::LimitReader;
mod etag_reader;
pub use etag_reader::EtagReader;
mod compress_index;
mod compress_reader;
pub use compress_reader::{CompressReader, DecompressReader};
@@ -18,21 +18,20 @@ pub use hardlimit_reader::HardLimitReader;
mod hash_reader;
pub use hash_reader::*;
pub mod compress;
pub mod reader;
pub use reader::WarpReader;
mod writer;
use tokio::io::{AsyncRead, BufReader};
pub use writer::*;
mod http_reader;
pub use http_reader::*;
pub use compress_index::TryGetIndex;
mod etag;
pub trait Reader: tokio::io::AsyncRead + Unpin + Send + Sync + EtagResolvable + HashReaderDetector {}
pub trait Reader: tokio::io::AsyncRead + Unpin + Send + Sync + EtagResolvable + HashReaderDetector + TryGetIndex {}
// Trait for types that can be recursively searched for etag capability
pub trait EtagResolvable {
@@ -52,12 +51,6 @@ where
reader.try_resolve_etag()
}
impl<T> EtagResolvable for BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
impl<T> EtagResolvable for Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
impl<T> EtagResolvable for Box<T> where T: EtagResolvable {}
/// Trait to detect and manipulate HashReader instances
pub trait HashReaderDetector {
fn is_hash_reader(&self) -> bool {
@@ -69,41 +62,8 @@ pub trait HashReaderDetector {
}
}
impl<T> HashReaderDetector for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
impl<T> HashReaderDetector for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
impl HashReaderDetector for Box<dyn AsyncRead + Unpin + Send + Sync> {}
impl<T> HashReaderDetector for Box<T> where T: HashReaderDetector {}
// Blanket implementations for Reader trait
impl<T> Reader for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
impl<T> Reader for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
impl<T> Reader for Box<T> where T: Reader {}
// Forward declarations for wrapper types that implement all required traits
impl Reader for crate::HashReader {}
impl Reader for HttpReader {}
impl Reader for crate::HardLimitReader {}
impl Reader for crate::EtagReader {}
impl<R> Reader for crate::EncryptReader<R> where R: Reader {}
impl<R> Reader for crate::DecryptReader<R> where R: Reader {}
impl<R> Reader for crate::CompressReader<R> where R: Reader {}
impl<R> Reader for crate::DecompressReader<R> where R: Reader {}
impl Reader for tokio::fs::File {}
impl HashReaderDetector for tokio::fs::File {}
impl EtagResolvable for tokio::fs::File {}
impl Reader for tokio::io::DuplexStream {}
impl HashReaderDetector for tokio::io::DuplexStream {}
impl EtagResolvable for tokio::io::DuplexStream {}
impl<R> Reader for crate::EncryptReader<R> where R: Reader {}

View File

@@ -9,7 +9,7 @@
//! async fn main() {
//! let data = b"hello world";
//! let reader = BufReader::new(&data[..]);
//! let mut limit_reader = LimitReader::new(reader, data.len() as u64);
//! let mut limit_reader = LimitReader::new(reader, data.len());
//!
//! let mut buf = Vec::new();
//! let n = limit_reader.read_to_end(&mut buf).await.unwrap();
@@ -23,25 +23,25 @@ use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, ReadBuf};
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut};
pin_project! {
#[derive(Debug)]
pub struct LimitReader<R> {
#[pin]
pub inner: R,
limit: u64,
read: u64,
limit: usize,
read: usize,
}
}
/// A wrapper for AsyncRead that limits the total number of bytes read.
impl<R> LimitReader<R>
where
R: Reader,
R: AsyncRead + Unpin + Send + Sync,
{
/// Create a new LimitReader wrapping `inner`, with a total read limit of `limit` bytes.
pub fn new(inner: R, limit: u64) -> Self {
pub fn new(inner: R, limit: usize) -> Self {
Self { inner, limit, read: 0 }
}
}
@@ -57,7 +57,7 @@ where
return Poll::Ready(Ok(()));
}
let orig_remaining = buf.remaining();
let allowed = remaining.min(orig_remaining as u64) as usize;
let allowed = remaining.min(orig_remaining);
if allowed == 0 {
return Poll::Ready(Ok(()));
}
@@ -66,7 +66,7 @@ where
let poll = this.inner.as_mut().poll_read(cx, buf);
if let Poll::Ready(Ok(())) = &poll {
let n = buf.filled().len() - before_size;
*this.read += n as u64;
*this.read += n;
}
poll
} else {
@@ -76,7 +76,7 @@ where
if let Poll::Ready(Ok(())) = &poll {
let n = temp_buf.filled().len();
buf.put_slice(temp_buf.filled());
*this.read += n as u64;
*this.read += n;
}
poll
}
@@ -115,7 +115,7 @@ mod tests {
async fn test_limit_reader_exact() {
let data = b"hello world";
let reader = BufReader::new(&data[..]);
let mut limit_reader = LimitReader::new(reader, data.len() as u64);
let mut limit_reader = LimitReader::new(reader, data.len());
let mut buf = Vec::new();
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
@@ -176,7 +176,7 @@ mod tests {
let mut data = vec![0u8; size];
rand::rng().fill(&mut data[..]);
let reader = Cursor::new(data.clone());
let mut limit_reader = LimitReader::new(reader, size as u64);
let mut limit_reader = LimitReader::new(reader, size);
// Read data into buffer
let mut buf = Vec::new();

View File

@@ -2,6 +2,7 @@ use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, ReadBuf};
use crate::compress_index::TryGetIndex;
use crate::{EtagResolvable, HashReaderDetector, Reader};
pub struct WarpReader<R> {
@@ -24,4 +25,6 @@ impl<R: AsyncRead + Unpin + Send + Sync> HashReaderDetector for WarpReader<R> {}
impl<R: AsyncRead + Unpin + Send + Sync> EtagResolvable for WarpReader<R> {}
impl<R: AsyncRead + Unpin + Send + Sync> TryGetIndex for WarpReader<R> {}
impl<R: AsyncRead + Unpin + Send + Sync> Reader for WarpReader<R> {}

View File

@@ -29,10 +29,15 @@ tempfile = { workspace = true, optional = true }
tokio = { workspace = true, optional = true, features = ["io-util", "macros"] }
tracing = { workspace = true }
url = { workspace = true , optional = true}
flate2 = { workspace = true , optional = true}
brotli = { workspace = true , optional = true}
zstd = { workspace = true , optional = true}
snap = { workspace = true , optional = true}
lz4 = { workspace = true , optional = true}
[dev-dependencies]
tempfile = { workspace = true }
rand = {workspace = true}
[target.'cfg(windows)'.dependencies]
winapi = { workspace = true, optional = true, features = ["std", "fileapi", "minwindef", "ntdef", "winnt"] }
@@ -47,9 +52,10 @@ tls = ["dep:rustls", "dep:rustls-pemfile", "dep:rustls-pki-types"] # tls charac
net = ["ip","dep:url", "dep:netif", "dep:lazy_static"] # empty network features
io = ["dep:tokio"]
path = []
compress =["dep:flate2","dep:brotli","dep:snap","dep:lz4","dep:zstd"]
string = ["dep:regex","dep:lazy_static"]
crypto = ["dep:base64-simd","dep:hex-simd"]
hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:siphasher"]
os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities
integration = [] # integration test features
full = ["ip", "tls", "net", "io","hash", "os", "integration","path","crypto", "string"] # all features
full = ["ip", "tls", "net", "io","hash", "os", "integration","path","crypto", "string","compress"] # all features

View File

@@ -1,13 +1,13 @@
use http::HeaderMap;
use std::io::Write;
use tokio::io;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum CompressionAlgorithm {
None,
Gzip,
#[default]
Deflate,
Zstd,
#[default]
Lz4,
Brotli,
Snappy,
@@ -16,6 +16,7 @@ pub enum CompressionAlgorithm {
impl CompressionAlgorithm {
pub fn as_str(&self) -> &str {
match self {
CompressionAlgorithm::None => "none",
CompressionAlgorithm::Gzip => "gzip",
CompressionAlgorithm::Deflate => "deflate",
CompressionAlgorithm::Zstd => "zstd",
@@ -42,10 +43,8 @@ impl std::str::FromStr for CompressionAlgorithm {
"lz4" => Ok(CompressionAlgorithm::Lz4),
"brotli" => Ok(CompressionAlgorithm::Brotli),
"snappy" => Ok(CompressionAlgorithm::Snappy),
_ => Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("Unsupported compression algorithm: {}", s),
)),
"none" => Ok(CompressionAlgorithm::None),
_ => Err(std::io::Error::other(format!("Unsupported compression algorithm: {}", s))),
}
}
}
@@ -88,6 +87,7 @@ pub fn compress_block(input: &[u8], algorithm: CompressionAlgorithm) -> Vec<u8>
let _ = encoder.write_all(input);
encoder.into_inner().unwrap_or_default()
}
CompressionAlgorithm::None => input.to_vec(),
}
}
@@ -129,20 +129,15 @@ pub fn decompress_block(compressed: &[u8], algorithm: CompressionAlgorithm) -> i
std::io::Read::read_to_end(&mut decoder, &mut out)?;
Ok(out)
}
CompressionAlgorithm::None => Ok(Vec::new()),
}
}
pub const MIN_COMPRESSIBLE_SIZE: i64 = 4096;
pub fn is_compressible(_headers: &HeaderMap) -> bool {
// TODO: Implement this function
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
use std::time::Instant;
#[test]
fn test_compress_decompress_gzip() {
@@ -267,4 +262,57 @@ mod tests {
&& !snappy.is_empty()
);
}
#[test]
fn test_compression_benchmark() {
let sizes = [128 * 1024, 512 * 1024, 1024 * 1024];
let algorithms = [
CompressionAlgorithm::Gzip,
CompressionAlgorithm::Deflate,
CompressionAlgorithm::Zstd,
CompressionAlgorithm::Lz4,
CompressionAlgorithm::Brotli,
CompressionAlgorithm::Snappy,
];
println!("\n压缩算法基准测试结果:");
println!(
"{:<10} {:<10} {:<15} {:<15} {:<15}",
"数据大小", "算法", "压缩时间(ms)", "压缩后大小", "压缩率"
);
for size in sizes {
// 生成可压缩的数据(重复的文本模式)
let pattern = b"Hello, this is a test pattern that will be repeated multiple times to create compressible data. ";
let data: Vec<u8> = pattern.iter().cycle().take(size).copied().collect();
for algo in algorithms {
// 压缩测试
let start = Instant::now();
let compressed = compress_block(&data, algo);
let compress_time = start.elapsed();
// 解压测试
let start = Instant::now();
let _decompressed = decompress_block(&compressed, algo).unwrap();
let _decompress_time = start.elapsed();
// 计算压缩率
let compression_ratio = (size as f64 / compressed.len() as f64) as f32;
println!(
"{:<10} {:<10} {:<15.2} {:<15} {:<15.2}x",
format!("{}KB", size / 1024),
algo.as_str(),
compress_time.as_secs_f64() * 1000.0,
compressed.len(),
compression_ratio
);
// 验证解压结果
assert_eq!(_decompressed, data);
}
println!(); // 添加空行分隔不同大小的结果
}
}
}

View File

@@ -25,6 +25,9 @@ pub mod string;
#[cfg(feature = "crypto")]
pub mod crypto;
#[cfg(feature = "compress")]
pub mod compress;
#[cfg(feature = "tls")]
pub use certs::*;
#[cfg(feature = "hash")]
@@ -36,3 +39,6 @@ pub use ip::*;
#[cfg(feature = "crypto")]
pub use crypto::*;
#[cfg(feature = "compress")]
pub use compress::*;

View File

@@ -32,6 +32,29 @@ pub fn match_pattern(pattern: &str, name: &str) -> bool {
deep_match_rune(name.as_bytes(), pattern.as_bytes(), false)
}
pub fn has_pattern(patterns: &[&str], match_str: &str) -> bool {
for pattern in patterns {
if match_simple(pattern, match_str) {
return true;
}
}
false
}
pub fn has_string_suffix_in_slice(str: &str, list: &[&str]) -> bool {
let str = str.to_lowercase();
for v in list {
if *v == "*" {
return true;
}
if str.ends_with(&v.to_lowercase()) {
return true;
}
}
false
}
fn deep_match_rune(str_: &[u8], pattern: &[u8], simple: bool) -> bool {
let (mut str_, mut pattern) = (str_, pattern);
while !pattern.is_empty() {

View File

@@ -91,6 +91,7 @@ winapi = { workspace = true }
[dev-dependencies]
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
criterion = { version = "0.5", features = ["html_reports"] }
temp-env = "0.2.0"
[build-dependencies]
shadow-rs = { workspace = true, features = ["build", "metadata"] }

View File

@@ -68,14 +68,20 @@ pub async fn create_bitrot_writer(
disk: Option<&DiskStore>,
volume: &str,
path: &str,
length: usize,
length: i64,
shard_size: usize,
checksum_algo: HashAlgorithm,
) -> disk::error::Result<BitrotWriterWrapper> {
let writer = if is_inline_buffer {
CustomWriter::new_inline_buffer()
} else if let Some(disk) = disk {
let length = length.div_ceil(shard_size) * checksum_algo.size() + length;
let length = if length > 0 {
let length = length as usize;
(length.div_ceil(shard_size) * checksum_algo.size() + length) as i64
} else {
0
};
let file = disk.create_file("", volume, path, length).await?;
CustomWriter::new_tokio_writer(file)
} else {

View File

@@ -443,7 +443,6 @@ impl BucketMetadataSys {
let bm = match self.get_config(bucket).await {
Ok((res, _)) => res,
Err(err) => {
warn!("get_object_lock_config err {:?}", &err);
return if err == Error::ConfigNotFound {
Err(BucketMetadataError::BucketObjectLockConfigNotFound.into())
} else {

View File

@@ -511,8 +511,8 @@ pub async fn get_heal_replicate_object_info(
let mut result = ReplicateObjectInfo {
name: oi.name.clone(),
size: oi.size as i64,
actual_size: asz as i64,
size: oi.size,
actual_size: asz,
bucket: oi.bucket.clone(),
//version_id: oi.version_id.clone(),
version_id: oi
@@ -814,8 +814,8 @@ impl ReplicationPool {
vsender.pop(); // Dropping the sender will close the channel
}
self.workers_sender = vsender;
warn!("self sender size is {:?}", self.workers_sender.len());
warn!("self sender size is {:?}", self.workers_sender.len());
// warn!("self sender size is {:?}", self.workers_sender.len());
// warn!("self sender size is {:?}", self.workers_sender.len());
}
async fn resize_failed_workers(&self, _count: usize) {
@@ -1758,13 +1758,13 @@ pub async fn schedule_replication(oi: ObjectInfo, o: Arc<store::ECStore>, dsc: R
let replication_timestamp = Utc::now(); // Placeholder for timestamp parsing
let replication_state = oi.replication_state();
let actual_size = oi.actual_size.unwrap_or(0);
let actual_size = oi.actual_size;
//let ssec = oi.user_defined.contains_key("ssec");
let ssec = false;
let ri = ReplicateObjectInfo {
name: oi.name,
size: oi.size as i64,
size: oi.size,
bucket: oi.bucket,
version_id: oi
.version_id
@@ -2018,8 +2018,8 @@ impl ReplicateObjectInfo {
mod_time: Some(
OffsetDateTime::from_unix_timestamp(self.mod_time.timestamp()).unwrap_or_else(|_| OffsetDateTime::now_utc()),
),
size: self.size as usize,
actual_size: Some(self.actual_size as usize),
size: self.size,
actual_size: self.actual_size,
is_dir: false,
user_defined: None, // 可以按需从别处导入
parity_blocks: 0,
@@ -2317,7 +2317,7 @@ impl ReplicateObjectInfo {
// 设置对象大小
//rinfo.size = object_info.actual_size.unwrap_or(0);
rinfo.size = object_info.actual_size.map_or(0, |v| v as i64);
rinfo.size = object_info.actual_size;
//rinfo.replication_action = object_info.
rinfo.replication_status = ReplicationStatusType::Completed;

115
ecstore/src/compress.rs Normal file
View File

@@ -0,0 +1,115 @@
use rustfs_utils::string::has_pattern;
use rustfs_utils::string::has_string_suffix_in_slice;
use std::env;
use tracing::error;
pub const MIN_COMPRESSIBLE_SIZE: usize = 4096;
// 环境变量名称,用于控制是否启用压缩
pub const ENV_COMPRESSION_ENABLED: &str = "RUSTFS_COMPRESSION_ENABLED";
// Some standard object extensions which we strictly dis-allow for compression.
pub const STANDARD_EXCLUDE_COMPRESS_EXTENSIONS: &[&str] = &[
".gz", ".bz2", ".rar", ".zip", ".7z", ".xz", ".mp4", ".mkv", ".mov", ".jpg", ".png", ".gif",
];
// Some standard content-types which we strictly dis-allow for compression.
pub const STANDARD_EXCLUDE_COMPRESS_CONTENT_TYPES: &[&str] = &[
"video/*",
"audio/*",
"application/zip",
"application/x-gzip",
"application/x-zip-compressed",
"application/x-compress",
"application/x-spoon",
];
pub fn is_compressible(headers: &http::HeaderMap, object_name: &str) -> bool {
// 检查环境变量是否启用压缩,默认关闭
if let Ok(compression_enabled) = env::var(ENV_COMPRESSION_ENABLED) {
if compression_enabled.to_lowercase() != "true" {
error!("Compression is disabled by environment variable");
return false;
}
} else {
// 环境变量未设置时默认关闭
return false;
}
let content_type = headers.get("content-type").and_then(|s| s.to_str().ok()).unwrap_or("");
// TODO: crypto request return false
if has_string_suffix_in_slice(object_name, STANDARD_EXCLUDE_COMPRESS_EXTENSIONS) {
error!("object_name: {} is not compressible", object_name);
return false;
}
if !content_type.is_empty() && has_pattern(STANDARD_EXCLUDE_COMPRESS_CONTENT_TYPES, content_type) {
error!("content_type: {} is not compressible", content_type);
return false;
}
true
// TODO: check from config
}
#[cfg(test)]
mod tests {
use super::*;
use temp_env;
#[test]
fn test_is_compressible() {
use http::HeaderMap;
let headers = HeaderMap::new();
// 测试环境变量控制
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("false"), || {
assert!(!is_compressible(&headers, "file.txt"));
});
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("true"), || {
assert!(is_compressible(&headers, "file.txt"));
});
temp_env::with_var_unset(ENV_COMPRESSION_ENABLED, || {
assert!(!is_compressible(&headers, "file.txt"));
});
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("true"), || {
let mut headers = HeaderMap::new();
// 测试不可压缩的扩展名
headers.insert("content-type", "text/plain".parse().unwrap());
assert!(!is_compressible(&headers, "file.gz"));
assert!(!is_compressible(&headers, "file.zip"));
assert!(!is_compressible(&headers, "file.mp4"));
assert!(!is_compressible(&headers, "file.jpg"));
// 测试不可压缩的内容类型
headers.insert("content-type", "video/mp4".parse().unwrap());
assert!(!is_compressible(&headers, "file.txt"));
headers.insert("content-type", "audio/mpeg".parse().unwrap());
assert!(!is_compressible(&headers, "file.txt"));
headers.insert("content-type", "application/zip".parse().unwrap());
assert!(!is_compressible(&headers, "file.txt"));
headers.insert("content-type", "application/x-gzip".parse().unwrap());
assert!(!is_compressible(&headers, "file.txt"));
// 测试可压缩的情况
headers.insert("content-type", "text/plain".parse().unwrap());
assert!(is_compressible(&headers, "file.txt"));
assert!(is_compressible(&headers, "file.log"));
headers.insert("content-type", "text/html".parse().unwrap());
assert!(is_compressible(&headers, "file.html"));
headers.insert("content-type", "application/json".parse().unwrap());
assert!(is_compressible(&headers, "file.json"));
});
}
}

View File

@@ -93,17 +93,11 @@ pub async fn delete_config<S: StorageAPI>(api: Arc<S>, file: &str) -> Result<()>
}
pub async fn save_config_with_opts<S: StorageAPI>(api: Arc<S>, file: &str, data: Vec<u8>, opts: &ObjectOptions) -> Result<()> {
warn!(
"save_config_with_opts, bucket: {}, file: {}, data len: {}",
RUSTFS_META_BUCKET,
file,
data.len()
);
if let Err(err) = api
.put_object(RUSTFS_META_BUCKET, file, &mut PutObjReader::from_vec(data), opts)
.await
{
warn!("save_config_with_opts: err: {:?}, file: {}", err, file);
error!("save_config_with_opts: err: {:?}, file: {}", err, file);
return Err(err);
}
Ok(())

View File

@@ -112,7 +112,13 @@ impl Config {
}
}
pub fn should_inline(&self, shard_size: usize, versioned: bool) -> bool {
pub fn should_inline(&self, shard_size: i64, versioned: bool) -> bool {
if shard_size < 0 {
return false;
}
let shard_size = shard_size as usize;
let mut inline_block = DEFAULT_INLINE_BLOCK;
if self.initialized {
inline_block = self.inline_block;

View File

@@ -773,7 +773,7 @@ impl LocalDisk {
Ok(res) => res,
Err(e) => {
if e != DiskError::VolumeNotFound && e != Error::FileNotFound {
warn!("scan list_dir {}, err {:?}", &current, &e);
debug!("scan list_dir {}, err {:?}", &current, &e);
}
if opts.report_notfound && e == Error::FileNotFound && current == &opts.base_dir {
@@ -785,7 +785,6 @@ impl LocalDisk {
};
if entries.is_empty() {
warn!("scan list_dir {}, entries is empty", &current);
return Ok(());
}
@@ -801,7 +800,6 @@ impl LocalDisk {
let entry = item.clone();
// check limit
if opts.limit > 0 && *objs_returned >= opts.limit {
warn!("scan list_dir {}, limit reached", &current);
return Ok(());
}
// check prefix
@@ -1207,7 +1205,7 @@ impl DiskAPI for LocalDisk {
let err = self
.bitrot_verify(
&part_path,
erasure.shard_file_size(part.size),
erasure.shard_file_size(part.size as i64) as usize,
checksum_info.algorithm,
&checksum_info.hash,
erasure.shard_size(),
@@ -1248,7 +1246,7 @@ impl DiskAPI for LocalDisk {
resp.results[i] = CHECK_PART_FILE_NOT_FOUND;
continue;
}
if (st.len() as usize) < fi.erasure.shard_file_size(part.size) {
if (st.len() as i64) < fi.erasure.shard_file_size(part.size as i64) {
resp.results[i] = CHECK_PART_FILE_CORRUPT;
continue;
}
@@ -1400,7 +1398,7 @@ impl DiskAPI for LocalDisk {
}
#[tracing::instrument(level = "debug", skip(self))]
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, _file_size: i64) -> Result<FileWriter> {
// warn!("disk create_file: origvolume: {}, volume: {}, path: {}", origvolume, volume, path);
if !origvolume.is_empty() {
@@ -1574,11 +1572,6 @@ impl DiskAPI for LocalDisk {
let mut current = opts.base_dir.clone();
self.scan_dir(&mut current, &opts, &mut out, &mut objs_returned).await?;
warn!(
"walk_dir: done, volume_dir: {:?}, base_dir: {}",
volume_dir.to_string_lossy(),
opts.base_dir
);
Ok(())
}
@@ -2239,7 +2232,7 @@ impl DiskAPI for LocalDisk {
let mut obj_deleted = false;
for info in obj_infos.iter() {
let done = ScannerMetrics::time(ScannerMetric::ApplyVersion);
let sz: usize;
let sz: i64;
(obj_deleted, sz) = item.apply_actions(info, &mut size_s).await;
done();
@@ -2260,7 +2253,7 @@ impl DiskAPI for LocalDisk {
size_s.versions += 1;
}
size_s.total_size += sz;
size_s.total_size += sz as usize;
if info.delete_marker {
continue;

View File

@@ -304,7 +304,7 @@ impl DiskAPI for Disk {
}
#[tracing::instrument(skip(self))]
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, _file_size: i64) -> Result<FileWriter> {
match self {
Disk::Local(local_disk) => local_disk.create_file(_origvolume, volume, path, _file_size).await,
Disk::Remote(remote_disk) => remote_disk.create_file(_origvolume, volume, path, _file_size).await,
@@ -491,7 +491,7 @@ pub trait DiskAPI: Debug + Send + Sync + 'static {
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader>;
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader>;
async fn append_file(&self, volume: &str, path: &str) -> Result<FileWriter>;
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter>;
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result<FileWriter>;
// ReadFileStream
async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()>;
async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Bytes) -> Result<()>;

View File

@@ -640,7 +640,7 @@ impl DiskAPI for RemoteDisk {
}
#[tracing::instrument(level = "debug", skip(self))]
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter> {
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result<FileWriter> {
info!("create_file {}/{}/{}", self.endpoint.to_string(), volume, path);
let url = format!(

View File

@@ -30,7 +30,7 @@ where
// readers传入前应处理disk错误确保每个reader达到可用数量的BitrotReader
pub fn new(readers: Vec<Option<BitrotReader<R>>>, e: Erasure, offset: usize, total_length: usize) -> Self {
let shard_size = e.shard_size();
let shard_file_size = e.shard_file_size(total_length);
let shard_file_size = e.shard_file_size(total_length as i64) as usize;
let offset = (offset / e.block_size) * shard_size;
@@ -142,6 +142,7 @@ where
W: tokio::io::AsyncWrite + Send + Sync + Unpin,
{
if get_data_block_len(en_blocks, data_blocks) < length {
error!("write_data_blocks get_data_block_len < length");
return Err(io::Error::new(ErrorKind::UnexpectedEof, "Not enough data blocks to write"));
}
@@ -150,6 +151,7 @@ where
for block_op in &en_blocks[..data_blocks] {
if block_op.is_none() {
error!("write_data_blocks block_op.is_none()");
return Err(io::Error::new(ErrorKind::UnexpectedEof, "Missing data block"));
}
@@ -164,7 +166,10 @@ where
offset = 0;
if write_left < block.len() {
writer.write_all(&block_slice[..write_left]).await?;
writer.write_all(&block_slice[..write_left]).await.map_err(|e| {
error!("write_data_blocks write_all err: {}", e);
e
})?;
total_written += write_left;
break;
@@ -172,7 +177,10 @@ where
let n = block_slice.len();
writer.write_all(block_slice).await?;
writer.write_all(block_slice).await.map_err(|e| {
error!("write_data_blocks write_all2 err: {}", e);
e
})?;
write_left -= n;
@@ -228,6 +236,7 @@ impl Erasure {
};
if block_length == 0 {
// error!("erasure decode decode block_length == 0");
break;
}

View File

@@ -469,22 +469,27 @@ impl Erasure {
}
/// Calculate the total erasure file size for a given original size.
// Returns the final erasure size from the original size
pub fn shard_file_size(&self, total_length: usize) -> usize {
pub fn shard_file_size(&self, total_length: i64) -> i64 {
if total_length == 0 {
return 0;
}
if total_length < 0 {
return total_length;
}
let total_length = total_length as usize;
let num_shards = total_length / self.block_size;
let last_block_size = total_length % self.block_size;
let last_shard_size = calc_shard_size(last_block_size, self.data_shards);
num_shards * self.shard_size() + last_shard_size
(num_shards * self.shard_size() + last_shard_size) as i64
}
/// Calculate the offset in the erasure file where reading begins.
// Returns the offset in the erasure file where reading begins
pub fn shard_file_offset(&self, start_offset: usize, length: usize, total_length: usize) -> usize {
let shard_size = self.shard_size();
let shard_file_size = self.shard_file_size(total_length);
let shard_file_size = self.shard_file_size(total_length as i64) as usize;
let end_shard = (start_offset + length) / self.block_size;
let mut till_offset = end_shard * shard_size + shard_size;
if till_offset > shard_file_size {

View File

@@ -526,7 +526,7 @@ impl ScannerItem {
cumulative_size += obj_info.size;
}
if cumulative_size >= SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE.load(Ordering::SeqCst) as usize {
if cumulative_size >= SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE.load(Ordering::SeqCst) as i64 {
//todo
}
@@ -558,7 +558,7 @@ impl ScannerItem {
Ok(object_infos)
}
pub async fn apply_actions(&mut self, oi: &ObjectInfo, _size_s: &mut SizeSummary) -> (bool, usize) {
pub async fn apply_actions(&mut self, oi: &ObjectInfo, _size_s: &mut SizeSummary) -> (bool, i64) {
let done = ScannerMetrics::time(ScannerMetric::Ilm);
//todo: lifecycle
info!(
@@ -641,21 +641,21 @@ impl ScannerItem {
match tgt_status {
ReplicationStatusType::Pending => {
tgt_size_s.pending_count += 1;
tgt_size_s.pending_size += oi.size;
tgt_size_s.pending_size += oi.size as usize;
size_s.pending_count += 1;
size_s.pending_size += oi.size;
size_s.pending_size += oi.size as usize;
}
ReplicationStatusType::Failed => {
tgt_size_s.failed_count += 1;
tgt_size_s.failed_size += oi.size;
tgt_size_s.failed_size += oi.size as usize;
size_s.failed_count += 1;
size_s.failed_size += oi.size;
size_s.failed_size += oi.size as usize;
}
ReplicationStatusType::Completed | ReplicationStatusType::CompletedLegacy => {
tgt_size_s.replicated_count += 1;
tgt_size_s.replicated_size += oi.size;
tgt_size_s.replicated_size += oi.size as usize;
size_s.replicated_count += 1;
size_s.replicated_size += oi.size;
size_s.replicated_size += oi.size as usize;
}
_ => {}
}
@@ -663,7 +663,7 @@ impl ScannerItem {
if matches!(oi.replication_status, ReplicationStatusType::Replica) {
size_s.replica_count += 1;
size_s.replica_size += oi.size;
size_s.replica_size += oi.size as usize;
}
}
}

View File

@@ -4,6 +4,7 @@ pub mod bucket;
pub mod cache_value;
mod chunk_stream;
pub mod cmd;
pub mod compress;
pub mod config;
pub mod disk;
pub mod disks_layout;

View File

@@ -24,7 +24,7 @@ use futures::future::BoxFuture;
use http::HeaderMap;
use rmp_serde::{Deserializer, Serializer};
use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
use rustfs_rio::HashReader;
use rustfs_rio::{HashReader, WarpReader};
use rustfs_utils::path::{SLASH_SEPARATOR, encode_dir_object, path_join};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
@@ -33,7 +33,7 @@ use std::io::{Cursor, Write};
use std::path::PathBuf;
use std::sync::Arc;
use time::{Duration, OffsetDateTime};
use tokio::io::AsyncReadExt;
use tokio::io::{AsyncReadExt, BufReader};
use tokio::sync::broadcast::Receiver as B_Receiver;
use tracing::{error, info, warn};
@@ -1254,6 +1254,7 @@ impl ECStore {
}
if let Err(err) = self
.clone()
.complete_multipart_upload(
&bucket,
&object_info.name,
@@ -1275,10 +1276,9 @@ impl ECStore {
return Ok(());
}
let mut data = PutObjReader::new(
HashReader::new(rd.stream, object_info.size as i64, object_info.size as i64, None, false)?,
object_info.size,
);
let reader = BufReader::new(rd.stream);
let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?;
let mut data = PutObjReader::new(hrd);
if let Err(err) = self
.put_object(

View File

@@ -12,13 +12,13 @@ use crate::store_api::{CompletePart, GetObjectReader, ObjectIO, ObjectOptions, P
use common::defer;
use http::HeaderMap;
use rustfs_filemeta::{FileInfo, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
use rustfs_rio::HashReader;
use rustfs_rio::{HashReader, WarpReader};
use rustfs_utils::path::encode_dir_object;
use serde::{Deserialize, Serialize};
use std::io::Cursor;
use std::sync::Arc;
use time::OffsetDateTime;
use tokio::io::AsyncReadExt;
use tokio::io::{AsyncReadExt, BufReader};
use tokio::sync::broadcast::{self, Receiver as B_Receiver};
use tokio::time::{Duration, Instant};
use tracing::{error, info, warn};
@@ -62,7 +62,7 @@ impl RebalanceStats {
self.num_versions += 1;
let on_disk_size = if !fi.deleted {
fi.size as i64 * (fi.erasure.data_blocks + fi.erasure.parity_blocks) as i64 / fi.erasure.data_blocks as i64
fi.size * (fi.erasure.data_blocks + fi.erasure.parity_blocks) as i64 / fi.erasure.data_blocks as i64
} else {
0
};
@@ -703,7 +703,7 @@ impl ECStore {
#[allow(unused_assignments)]
#[tracing::instrument(skip(self, set))]
async fn rebalance_entry(
&self,
self: Arc<Self>,
bucket: String,
pool_index: usize,
entry: MetaCacheEntry,
@@ -834,7 +834,7 @@ impl ECStore {
}
};
if let Err(err) = self.rebalance_object(pool_index, bucket.clone(), rd).await {
if let Err(err) = self.clone().rebalance_object(pool_index, bucket.clone(), rd).await {
if is_err_object_not_found(&err) || is_err_version_not_found(&err) || is_err_data_movement_overwrite(&err) {
ignore = true;
warn!("rebalance_entry {} Entry {} is already deleted, skipping", &bucket, version.name);
@@ -890,7 +890,7 @@ impl ECStore {
}
#[tracing::instrument(skip(self, rd))]
async fn rebalance_object(&self, pool_idx: usize, bucket: String, rd: GetObjectReader) -> Result<()> {
async fn rebalance_object(self: Arc<Self>, pool_idx: usize, bucket: String, rd: GetObjectReader) -> Result<()> {
let object_info = rd.object_info.clone();
// TODO: check : use size or actual_size ?
@@ -969,6 +969,7 @@ impl ECStore {
}
if let Err(err) = self
.clone()
.complete_multipart_upload(
&bucket,
&object_info.name,
@@ -989,8 +990,9 @@ impl ECStore {
return Ok(());
}
let hrd = HashReader::new(rd.stream, object_info.size as i64, object_info.size as i64, None, false)?;
let mut data = PutObjReader::new(hrd, object_info.size);
let reader = BufReader::new(rd.stream);
let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?;
let mut data = PutObjReader::new(hrd);
if let Err(err) = self
.put_object(

View File

@@ -55,13 +55,14 @@ use lock::{LockApi, namespace_lock::NsLockMap};
use madmin::heal_commands::{HealDriveInfo, HealResultItem};
use md5::{Digest as Md5Digest, Md5};
use rand::{Rng, seq::SliceRandom};
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
use rustfs_filemeta::{
FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo,
RawFileInfo, file_info_from_raw,
headers::{AMZ_OBJECT_TAGGING, AMZ_STORAGE_CLASS},
merge_file_meta_versions,
};
use rustfs_rio::{EtagResolvable, HashReader};
use rustfs_rio::{EtagResolvable, HashReader, TryGetIndex as _, WarpReader};
use rustfs_utils::{
HashAlgorithm,
crypto::{base64_decode, base64_encode, hex},
@@ -860,7 +861,8 @@ impl SetDisks {
};
if let Some(err) = reduce_read_quorum_errs(errs, OBJECT_OP_IGNORED_ERRS, expected_rquorum) {
error!("object_quorum_from_meta: {:?}, errs={:?}", err, errs);
// let object = parts_metadata.first().map(|v| v.name.clone()).unwrap_or_default();
// error!("object_quorum_from_meta: {:?}, errs={:?}, object={:?}", err, errs, object);
return Err(err);
}
@@ -1773,7 +1775,7 @@ impl SetDisks {
{
Ok(v) => v,
Err(e) => {
error!("Self::object_quorum_from_meta: {:?}, bucket: {}, object: {}", &e, bucket, object);
// error!("Self::object_quorum_from_meta: {:?}, bucket: {}, object: {}", &e, bucket, object);
return Err(e);
}
};
@@ -1817,7 +1819,7 @@ impl SetDisks {
bucket: &str,
object: &str,
offset: usize,
length: usize,
length: i64,
writer: &mut W,
fi: FileInfo,
files: Vec<FileInfo>,
@@ -1830,11 +1832,16 @@ impl SetDisks {
{
let (disks, files) = Self::shuffle_disks_and_parts_metadata_by_index(disks, &files, &fi);
let total_size = fi.size;
let total_size = fi.size as usize;
let length = { if length == 0 { total_size - offset } else { length } };
let length = if length < 0 {
fi.size as usize - offset
} else {
length as usize
};
if offset > total_size || offset + length > total_size {
error!("get_object_with_fileinfo offset out of range: {}, total_size: {}", offset, total_size);
return Err(Error::other("offset out of range"));
}
@@ -1852,11 +1859,6 @@ impl SetDisks {
let (last_part_index, _) = fi.to_part_offset(end_offset)?;
// debug!(
// "get_object_with_fileinfo end offset:{}, last_part_index:{},part_offset:{}",
// end_offset, last_part_index, 0
// );
// let erasure = Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
let erasure = erasure_coding::Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
@@ -1870,7 +1872,7 @@ impl SetDisks {
let part_number = fi.parts[i].number;
let part_size = fi.parts[i].size;
let mut part_length = part_size - part_offset;
if part_length > length - total_readed {
if part_length > (length - total_readed) {
part_length = length - total_readed
}
@@ -1912,7 +1914,7 @@ impl SetDisks {
error!("create_bitrot_reader reduce_read_quorum_errs {:?}", &errors);
return Err(to_object_err(read_err.into(), vec![bucket, object]));
}
error!("create_bitrot_reader not enough disks to read: {:?}", &errors);
return Err(Error::other(format!("not enough disks to read: {:?}", errors)));
}
@@ -2259,7 +2261,8 @@ impl SetDisks {
erasure_coding::Erasure::default()
};
result.object_size = ObjectInfo::from_file_info(&lastest_meta, bucket, object, true).get_actual_size()?;
result.object_size =
ObjectInfo::from_file_info(&lastest_meta, bucket, object, true).get_actual_size()? as usize;
// Loop to find number of disks with valid data, per-drive
// data state and a list of outdated disks on which data needs
// to be healed.
@@ -2521,7 +2524,7 @@ impl SetDisks {
disk.as_ref(),
RUSTFS_META_TMP_BUCKET,
&format!("{}/{}/part.{}", tmp_id, dst_data_dir, part.number),
erasure.shard_file_size(part.size),
erasure.shard_file_size(part.size as i64),
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
@@ -2603,6 +2606,7 @@ impl SetDisks {
part.size,
part.mod_time,
part.actual_size,
part.index.clone(),
);
if is_inline_buffer {
if let Some(writer) = writers[index].take() {
@@ -2834,7 +2838,7 @@ impl SetDisks {
heal_item_type: HEAL_ITEM_OBJECT.to_string(),
bucket: bucket.to_string(),
object: object.to_string(),
object_size: lfi.size,
object_size: lfi.size as usize,
version_id: version_id.to_string(),
disk_count: disk_len,
..Default::default()
@@ -3500,7 +3504,7 @@ impl SetDisks {
if let (Some(started), Some(mod_time)) = (started, version.mod_time) {
if mod_time > started {
version_not_found += 1;
if send(heal_entry_skipped(version.size)).await {
if send(heal_entry_skipped(version.size as usize)).await {
defer.await;
return;
}
@@ -3544,10 +3548,10 @@ impl SetDisks {
if version_healed {
bg_seq.count_healed(HEAL_ITEM_OBJECT.to_string()).await;
result = heal_entry_success(version.size);
result = heal_entry_success(version.size as usize);
} else {
bg_seq.count_failed(HEAL_ITEM_OBJECT.to_string()).await;
result = heal_entry_failure(version.size);
result = heal_entry_failure(version.size as usize);
match version.version_id {
Some(version_id) => {
info!("unable to heal object {}/{}-v({})", bucket, version.name, version_id);
@@ -3863,7 +3867,7 @@ impl ObjectIO for SetDisks {
let is_inline_buffer = {
if let Some(sc) = GLOBAL_StorageClass.get() {
sc.should_inline(erasure.shard_file_size(data.content_length), opts.versioned)
sc.should_inline(erasure.shard_file_size(data.size()), opts.versioned)
} else {
false
}
@@ -3878,7 +3882,7 @@ impl ObjectIO for SetDisks {
Some(disk),
RUSTFS_META_TMP_BUCKET,
&tmp_object,
erasure.shard_file_size(data.content_length),
erasure.shard_file_size(data.size()),
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
@@ -3924,7 +3928,10 @@ impl ObjectIO for SetDisks {
return Err(Error::other(format!("not enough disks to write: {:?}", errors)));
}
let stream = mem::replace(&mut data.stream, HashReader::new(Box::new(Cursor::new(Vec::new())), 0, 0, None, false)?);
let stream = mem::replace(
&mut data.stream,
HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?,
);
let (reader, w_size) = match Arc::new(erasure).encode(stream, &mut writers, write_quorum).await {
Ok((r, w)) => (r, w),
@@ -3939,6 +3946,16 @@ impl ObjectIO for SetDisks {
// error!("close_bitrot_writers err {:?}", err);
// }
if (w_size as i64) < data.size() {
return Err(Error::other("put_object write size < data.size()"));
}
if user_defined.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER)) {
user_defined.insert(format!("{}compression-size", RESERVED_METADATA_PREFIX_LOWER), w_size.to_string());
}
let index_op = data.stream.try_get_index().map(|v| v.clone().into_vec());
//TODO: userDefined
let etag = data.stream.try_resolve_etag().unwrap_or_default();
@@ -3949,6 +3966,14 @@ impl ObjectIO for SetDisks {
// get content-type
}
let mut actual_size = data.actual_size();
if actual_size < 0 {
let is_compressed = fi.is_compressed();
if !is_compressed {
actual_size = w_size as i64;
}
}
if let Some(sc) = user_defined.get(AMZ_STORAGE_CLASS) {
if sc == storageclass::STANDARD {
let _ = user_defined.remove(AMZ_STORAGE_CLASS);
@@ -3962,17 +3987,19 @@ impl ObjectIO for SetDisks {
if let Some(writer) = writers[i].take() {
fi.data = Some(writer.into_inline_data().map(bytes::Bytes::from).unwrap_or_default());
}
fi.set_inline_data();
}
fi.metadata = user_defined.clone();
fi.mod_time = Some(now);
fi.size = w_size;
fi.size = w_size as i64;
fi.versioned = opts.versioned || opts.version_suspended;
fi.add_object_part(1, etag.clone(), w_size, fi.mod_time, w_size);
fi.add_object_part(1, etag.clone(), w_size, fi.mod_time, actual_size, index_op.clone());
fi.set_inline_data();
// debug!("put_object fi {:?}", &fi)
if opts.data_movement {
fi.set_data_moved();
}
}
let (online_disks, _, op_old_dir) = Self::rename_data(
@@ -4566,7 +4593,7 @@ impl StorageAPI for SetDisks {
Some(disk),
RUSTFS_META_TMP_BUCKET,
&tmp_part_path,
erasure.shard_file_size(data.content_length),
erasure.shard_file_size(data.size()),
erasure.shard_size(),
HashAlgorithm::HighwayHash256,
)
@@ -4605,16 +4632,33 @@ impl StorageAPI for SetDisks {
return Err(Error::other(format!("not enough disks to write: {:?}", errors)));
}
let stream = mem::replace(&mut data.stream, HashReader::new(Box::new(Cursor::new(Vec::new())), 0, 0, None, false)?);
let stream = mem::replace(
&mut data.stream,
HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?,
);
let (reader, w_size) = Arc::new(erasure).encode(stream, &mut writers, write_quorum).await?; // TODO: 出错,删除临时目录
let _ = mem::replace(&mut data.stream, reader);
if (w_size as i64) < data.size() {
return Err(Error::other("put_object_part write size < data.size()"));
}
let index_op = data.stream.try_get_index().map(|v| v.clone().into_vec());
let mut etag = data.stream.try_resolve_etag().unwrap_or_default();
if let Some(ref tag) = opts.preserve_etag {
etag = tag.clone(); // TODO: 需要验证 etag 是否一致
etag = tag.clone();
}
let mut actual_size = data.actual_size();
if actual_size < 0 {
let is_compressed = fi.is_compressed();
if !is_compressed {
actual_size = w_size as i64;
}
}
let part_info = ObjectPartInfo {
@@ -4622,7 +4666,8 @@ impl StorageAPI for SetDisks {
number: part_id,
size: w_size,
mod_time: Some(OffsetDateTime::now_utc()),
actual_size: data.content_length,
actual_size,
index: index_op,
..Default::default()
};
@@ -4649,6 +4694,7 @@ impl StorageAPI for SetDisks {
part_num: part_id,
last_mod: Some(OffsetDateTime::now_utc()),
size: w_size,
actual_size,
};
// error!("put_object_part ret {:?}", &ret);
@@ -4932,7 +4978,7 @@ impl StorageAPI for SetDisks {
// complete_multipart_upload 完成
#[tracing::instrument(skip(self))]
async fn complete_multipart_upload(
&self,
self: Arc<Self>,
bucket: &str,
object: &str,
upload_id: &str,
@@ -4974,12 +5020,15 @@ impl StorageAPI for SetDisks {
for (i, res) in part_files_resp.iter().enumerate() {
let part_id = uploaded_parts[i].part_num;
if !res.error.is_empty() || !res.exists {
// error!("complete_multipart_upload part_id err {:?}", res);
error!("complete_multipart_upload part_id err {:?}, exists={}", res, res.exists);
return Err(Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned()));
}
let part_fi = FileInfo::unmarshal(&res.data).map_err(|_e| {
// error!("complete_multipart_upload FileInfo::unmarshal err {:?}", e);
let part_fi = FileInfo::unmarshal(&res.data).map_err(|e| {
error!(
"complete_multipart_upload FileInfo::unmarshal err {:?}, part_id={}, bucket={}, object={}",
e, part_id, bucket, object
);
Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned())
})?;
let part = &part_fi.parts[0];
@@ -4989,11 +5038,18 @@ impl StorageAPI for SetDisks {
// debug!("complete part {} object info {:?}", part_num, &part);
if part_id != part_num {
// error!("complete_multipart_upload part_id err part_id != part_num {} != {}", part_id, part_num);
error!("complete_multipart_upload part_id err part_id != part_num {} != {}", part_id, part_num);
return Err(Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned()));
}
fi.add_object_part(part.number, part.etag.clone(), part.size, part.mod_time, part.actual_size);
fi.add_object_part(
part.number,
part.etag.clone(),
part.size,
part.mod_time,
part.actual_size,
part.index.clone(),
);
}
let (shuffle_disks, mut parts_metadatas) = Self::shuffle_disks_and_parts_metadata_by_index(&disks, &files_metas, &fi);
@@ -5003,24 +5059,35 @@ impl StorageAPI for SetDisks {
fi.parts = Vec::with_capacity(uploaded_parts.len());
let mut object_size: usize = 0;
let mut object_actual_size: usize = 0;
let mut object_actual_size: i64 = 0;
for (i, p) in uploaded_parts.iter().enumerate() {
let has_part = curr_fi.parts.iter().find(|v| v.number == p.part_num);
if has_part.is_none() {
// error!("complete_multipart_upload has_part.is_none() {:?}", has_part);
error!(
"complete_multipart_upload has_part.is_none() {:?}, part_id={}, bucket={}, object={}",
has_part, p.part_num, bucket, object
);
return Err(Error::InvalidPart(p.part_num, "".to_owned(), p.etag.clone().unwrap_or_default()));
}
let ext_part = &curr_fi.parts[i];
if p.etag != Some(ext_part.etag.clone()) {
error!(
"complete_multipart_upload etag err {:?}, part_id={}, bucket={}, object={}",
p.etag, p.part_num, bucket, object
);
return Err(Error::InvalidPart(p.part_num, ext_part.etag.clone(), p.etag.clone().unwrap_or_default()));
}
// TODO: crypto
if (i < uploaded_parts.len() - 1) && !is_min_allowed_part_size(ext_part.size) {
if (i < uploaded_parts.len() - 1) && !is_min_allowed_part_size(ext_part.actual_size) {
error!(
"complete_multipart_upload is_min_allowed_part_size err {:?}, part_id={}, bucket={}, object={}",
ext_part.actual_size, p.part_num, bucket, object
);
return Err(Error::InvalidPart(p.part_num, ext_part.etag.clone(), p.etag.clone().unwrap_or_default()));
}
@@ -5033,11 +5100,12 @@ impl StorageAPI for SetDisks {
size: ext_part.size,
mod_time: ext_part.mod_time,
actual_size: ext_part.actual_size,
index: ext_part.index.clone(),
..Default::default()
});
}
fi.size = object_size;
fi.size = object_size as i64;
fi.mod_time = opts.mod_time;
if fi.mod_time.is_none() {
fi.mod_time = Some(OffsetDateTime::now_utc());
@@ -5054,6 +5122,18 @@ impl StorageAPI for SetDisks {
fi.metadata.insert("etag".to_owned(), etag);
fi.metadata
.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER), object_actual_size.to_string());
if fi.is_compressed() {
fi.metadata
.insert(format!("{}compression-size", RESERVED_METADATA_PREFIX_LOWER), object_size.to_string());
}
if opts.data_movement {
fi.set_data_moved();
}
// TODO: object_actual_size
let _ = object_actual_size;
@@ -5125,17 +5205,6 @@ impl StorageAPI for SetDisks {
)
.await?;
for (i, op_disk) in online_disks.iter().enumerate() {
if let Some(disk) = op_disk {
if disk.is_online().await {
fi = parts_metadatas[i].clone();
break;
}
}
}
fi.is_latest = true;
// debug!("complete fileinfo {:?}", &fi);
// TODO: reduce_common_data_dir
@@ -5157,7 +5226,22 @@ impl StorageAPI for SetDisks {
.await;
}
let _ = self.delete_all(RUSTFS_META_MULTIPART_BUCKET, &upload_id_path).await;
let upload_id_path = upload_id_path.clone();
let store = self.clone();
let _cleanup_handle = tokio::spawn(async move {
let _ = store.delete_all(RUSTFS_META_MULTIPART_BUCKET, &upload_id_path).await;
});
for (i, op_disk) in online_disks.iter().enumerate() {
if let Some(disk) = op_disk {
if disk.is_online().await {
fi = parts_metadatas[i].clone();
break;
}
}
}
fi.is_latest = true;
Ok(ObjectInfo::from_file_info(&fi, bucket, object, opts.versioned || opts.version_suspended))
}
@@ -5517,7 +5601,7 @@ async fn disks_with_all_parts(
let verify_err = bitrot_verify(
Box::new(Cursor::new(data.clone())),
data_len,
meta.erasure.shard_file_size(meta.size),
meta.erasure.shard_file_size(meta.size) as usize,
checksum_info.algorithm,
checksum_info.hash,
meta.erasure.shard_size(),
@@ -5729,8 +5813,8 @@ pub async fn stat_all_dirs(disks: &[Option<DiskStore>], bucket: &str, prefix: &s
}
const GLOBAL_MIN_PART_SIZE: ByteSize = ByteSize::mib(5);
fn is_min_allowed_part_size(size: usize) -> bool {
size as u64 >= GLOBAL_MIN_PART_SIZE.as_u64()
fn is_min_allowed_part_size(size: i64) -> bool {
size >= GLOBAL_MIN_PART_SIZE.as_u64() as i64
}
fn get_complete_multipart_md5(parts: &[CompletePart]) -> String {

View File

@@ -627,7 +627,7 @@ impl StorageAPI for Sets {
#[tracing::instrument(skip(self))]
async fn complete_multipart_upload(
&self,
self: Arc<Self>,
bucket: &str,
object: &str,
upload_id: &str,

View File

@@ -1233,7 +1233,7 @@ impl ObjectIO for ECStore {
return self.pools[0].put_object(bucket, object.as_str(), data, opts).await;
}
let idx = self.get_pool_idx(bucket, &object, data.content_length as i64).await?;
let idx = self.get_pool_idx(bucket, &object, data.size()).await?;
if opts.data_movement && idx == opts.src_pool_idx {
return Err(StorageError::DataMovementOverwriteErr(
@@ -1508,9 +1508,7 @@ impl StorageAPI for ECStore {
// TODO: nslock
let pool_idx = self
.get_pool_idx_no_lock(src_bucket, &src_object, src_info.size as i64)
.await?;
let pool_idx = self.get_pool_idx_no_lock(src_bucket, &src_object, src_info.size).await?;
if cp_src_dst_same {
if let (Some(src_vid), Some(dst_vid)) = (&src_opts.version_id, &dst_opts.version_id) {
@@ -1995,7 +1993,7 @@ impl StorageAPI for ECStore {
#[tracing::instrument(skip(self))]
async fn complete_multipart_upload(
&self,
self: Arc<Self>,
bucket: &str,
object: &str,
upload_id: &str,
@@ -2006,6 +2004,7 @@ impl StorageAPI for ECStore {
if self.single_pool() {
return self.pools[0]
.clone()
.complete_multipart_upload(bucket, object, upload_id, uploaded_parts, opts)
.await;
}
@@ -2015,6 +2014,7 @@ impl StorageAPI for ECStore {
continue;
}
let pool = pool.clone();
let err = match pool
.complete_multipart_upload(bucket, object, upload_id, uploaded_parts.clone(), opts)
.await

View File

@@ -7,24 +7,24 @@ use crate::store_utils::clean_metadata;
use crate::{disk::DiskStore, heal::heal_commands::HealOpts};
use http::{HeaderMap, HeaderValue};
use madmin::heal_commands::HealResultItem;
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
use rustfs_filemeta::{FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, headers::AMZ_OBJECT_TAGGING};
use rustfs_rio::{HashReader, Reader};
use rustfs_rio::{DecompressReader, HashReader, LimitReader, WarpReader};
use rustfs_utils::CompressionAlgorithm;
use rustfs_utils::path::decode_dir_object;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt::Debug;
use std::io::Cursor;
use std::str::FromStr as _;
use std::sync::Arc;
use time::OffsetDateTime;
use tokio::io::AsyncReadExt;
use tokio::io::{AsyncRead, AsyncReadExt};
use tracing::warn;
use uuid::Uuid;
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
pub const RESERVED_METADATA_PREFIX: &str = "X-Rustfs-Internal-";
pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-";
pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct MakeBucketOptions {
@@ -53,46 +53,50 @@ pub struct DeleteBucketOptions {
pub struct PutObjReader {
pub stream: HashReader,
pub content_length: usize,
}
impl Debug for PutObjReader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PutObjReader")
.field("content_length", &self.content_length)
.finish()
f.debug_struct("PutObjReader").finish()
}
}
impl PutObjReader {
pub fn new(stream: HashReader, content_length: usize) -> Self {
PutObjReader { stream, content_length }
pub fn new(stream: HashReader) -> Self {
PutObjReader { stream }
}
pub fn from_vec(data: Vec<u8>) -> Self {
let content_length = data.len();
let content_length = data.len() as i64;
PutObjReader {
stream: HashReader::new(Box::new(Cursor::new(data)), content_length as i64, content_length as i64, None, false)
stream: HashReader::new(Box::new(WarpReader::new(Cursor::new(data))), content_length, content_length, None, false)
.unwrap(),
content_length,
}
}
pub fn size(&self) -> i64 {
self.stream.size()
}
pub fn actual_size(&self) -> i64 {
self.stream.actual_size()
}
}
pub struct GetObjectReader {
pub stream: Box<dyn Reader>,
pub stream: Box<dyn AsyncRead + Unpin + Send + Sync>,
pub object_info: ObjectInfo,
}
impl GetObjectReader {
#[tracing::instrument(level = "debug", skip(reader))]
pub fn new(
reader: Box<dyn Reader>,
reader: Box<dyn AsyncRead + Unpin + Send + Sync>,
rs: Option<HTTPRangeSpec>,
oi: &ObjectInfo,
opts: &ObjectOptions,
_h: &HeaderMap<HeaderValue>,
) -> Result<(Self, usize, usize)> {
) -> Result<(Self, usize, i64)> {
let mut rs = rs;
if let Some(part_number) = opts.part_number {
@@ -101,6 +105,47 @@ impl GetObjectReader {
}
}
// TODO:Encrypted
let (algo, is_compressed) = oi.is_compressed_ok()?;
// TODO: check TRANSITION
if is_compressed {
let actual_size = oi.get_actual_size()?;
let (off, length) = (0, oi.size);
let (_dec_off, dec_length) = (0, actual_size);
if let Some(_rs) = rs {
// TODO: range spec is not supported for compressed object
return Err(Error::other("The requested range is not satisfiable"));
// let (off, length) = rs.get_offset_length(actual_size)?;
}
let dec_reader = DecompressReader::new(reader, algo);
let actual_size = if actual_size > 0 {
actual_size as usize
} else {
return Err(Error::other(format!("invalid decompressed size {}", actual_size)));
};
warn!("actual_size: {}", actual_size);
let dec_reader = LimitReader::new(dec_reader, actual_size);
let mut oi = oi.clone();
oi.size = dec_length;
warn!("oi.size: {}, off: {}, length: {}", oi.size, off, length);
return Ok((
GetObjectReader {
stream: Box::new(dec_reader),
object_info: oi,
},
off,
length,
));
}
if let Some(rs) = rs {
let (off, length) = rs.get_offset_length(oi.size)?;
@@ -142,8 +187,8 @@ impl GetObjectReader {
#[derive(Debug)]
pub struct HTTPRangeSpec {
pub is_suffix_length: bool,
pub start: usize,
pub end: Option<usize>,
pub start: i64,
pub end: i64,
}
impl HTTPRangeSpec {
@@ -152,29 +197,38 @@ impl HTTPRangeSpec {
return None;
}
let mut start = 0;
let mut end = -1;
let mut start = 0i64;
let mut end = -1i64;
for i in 0..oi.parts.len().min(part_number) {
start = end + 1;
end = start + oi.parts[i].size as i64 - 1
end = start + (oi.parts[i].size as i64) - 1
}
Some(HTTPRangeSpec {
is_suffix_length: false,
start: start as usize,
end: { if end < 0 { None } else { Some(end as usize) } },
start,
end,
})
}
pub fn get_offset_length(&self, res_size: usize) -> Result<(usize, usize)> {
pub fn get_offset_length(&self, res_size: i64) -> Result<(usize, i64)> {
let len = self.get_length(res_size)?;
let mut start = self.start;
if self.is_suffix_length {
start = res_size - self.start
start = res_size + self.start;
if start < 0 {
start = 0;
}
}
Ok((start, len))
Ok((start as usize, len))
}
pub fn get_length(&self, res_size: usize) -> Result<usize> {
pub fn get_length(&self, res_size: i64) -> Result<i64> {
if res_size < 0 {
return Err(Error::other("The requested range is not satisfiable"));
}
if self.is_suffix_length {
let specified_len = self.start; // 假设 h.start 是一个 i64 类型
let mut range_length = specified_len;
@@ -190,8 +244,8 @@ impl HTTPRangeSpec {
return Err(Error::other("The requested range is not satisfiable"));
}
if let Some(end) = self.end {
let mut end = end;
if self.end > -1 {
let mut end = self.end;
if res_size <= end {
end = res_size - 1;
}
@@ -200,7 +254,7 @@ impl HTTPRangeSpec {
return Ok(range_length);
}
if self.end.is_none() {
if self.end == -1 {
let range_length = res_size - self.start;
return Ok(range_length);
}
@@ -276,6 +330,7 @@ pub struct PartInfo {
pub last_mod: Option<OffsetDateTime>,
pub size: usize,
pub etag: Option<String>,
pub actual_size: i64,
}
#[derive(Debug, Clone, Default)]
@@ -298,9 +353,9 @@ pub struct ObjectInfo {
pub bucket: String,
pub name: String,
pub mod_time: Option<OffsetDateTime>,
pub size: usize,
pub size: i64,
// Actual size is the real size of the object uploaded by client.
pub actual_size: Option<usize>,
pub actual_size: i64,
pub is_dir: bool,
pub user_defined: Option<HashMap<String, String>>,
pub parity_blocks: usize,
@@ -364,27 +419,41 @@ impl Clone for ObjectInfo {
impl ObjectInfo {
pub fn is_compressed(&self) -> bool {
if let Some(meta) = &self.user_defined {
meta.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX))
meta.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER))
} else {
false
}
}
pub fn is_compressed_ok(&self) -> Result<(CompressionAlgorithm, bool)> {
let scheme = self
.user_defined
.as_ref()
.and_then(|meta| meta.get(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER)).cloned());
if let Some(scheme) = scheme {
let algorithm = CompressionAlgorithm::from_str(&scheme)?;
Ok((algorithm, true))
} else {
Ok((CompressionAlgorithm::None, false))
}
}
pub fn is_multipart(&self) -> bool {
self.etag.as_ref().is_some_and(|v| v.len() != 32)
}
pub fn get_actual_size(&self) -> std::io::Result<usize> {
if let Some(actual_size) = self.actual_size {
return Ok(actual_size);
pub fn get_actual_size(&self) -> std::io::Result<i64> {
if self.actual_size > 0 {
return Ok(self.actual_size);
}
if self.is_compressed() {
if let Some(meta) = &self.user_defined {
if let Some(size_str) = meta.get(&format!("{}actual-size", RESERVED_METADATA_PREFIX)) {
if let Some(size_str) = meta.get(&format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER)) {
if !size_str.is_empty() {
// Todo: deal with error
let size = size_str.parse::<usize>().map_err(|e| std::io::Error::other(e.to_string()))?;
let size = size_str.parse::<i64>().map_err(|e| std::io::Error::other(e.to_string()))?;
return Ok(size);
}
}
@@ -395,8 +464,9 @@ impl ObjectInfo {
actual_size += part.actual_size;
});
if actual_size == 0 && actual_size != self.size {
return Err(std::io::Error::other("invalid decompressed size"));
return Err(std::io::Error::other(format!("invalid decompressed size {} {}", actual_size, self.size)));
}
return Ok(actual_size);
}
@@ -803,7 +873,7 @@ pub trait StorageAPI: ObjectIO {
// ListObjectParts
async fn abort_multipart_upload(&self, bucket: &str, object: &str, upload_id: &str, opts: &ObjectOptions) -> Result<()>;
async fn complete_multipart_upload(
&self,
self: Arc<Self>,
bucket: &str,
object: &str,
upload_id: &str,

View File

@@ -164,7 +164,7 @@ pub struct PutFileQuery {
volume: String,
path: String,
append: bool,
size: usize,
size: i64,
}
pub struct PutFile {}
#[async_trait::async_trait]

View File

@@ -29,10 +29,15 @@ use ecstore::bucket::metadata_sys;
use ecstore::bucket::policy_sys::PolicySys;
use ecstore::bucket::tagging::decode_tags;
use ecstore::bucket::tagging::encode_tags;
use ecstore::bucket::utils::serialize;
use ecstore::bucket::versioning_sys::BucketVersioningSys;
use ecstore::cmd::bucket_replication::ReplicationStatusType;
use ecstore::cmd::bucket_replication::ReplicationType;
use ecstore::cmd::bucket_replication::get_must_replicate_options;
use ecstore::cmd::bucket_replication::must_replicate;
use ecstore::cmd::bucket_replication::schedule_replication;
use ecstore::compress::MIN_COMPRESSIBLE_SIZE;
use ecstore::compress::is_compressible;
use ecstore::error::StorageError;
use ecstore::new_object_layer_fn;
use ecstore::set_disk::DEFAULT_READ_BUFFER_SIZE;
@@ -46,12 +51,7 @@ use ecstore::store_api::ObjectIO;
use ecstore::store_api::ObjectOptions;
use ecstore::store_api::ObjectToDelete;
use ecstore::store_api::PutObjReader;
use ecstore::store_api::StorageAPI;
// use ecstore::store_api::RESERVED_METADATA_PREFIX;
use ecstore::bucket::utils::serialize;
use ecstore::cmd::bucket_replication::ReplicationStatusType;
use ecstore::cmd::bucket_replication::ReplicationType;
use ecstore::store_api::RESERVED_METADATA_PREFIX_LOWER;
use ecstore::store_api::StorageAPI; // use ecstore::store_api::RESERVED_METADATA_PREFIX;
use futures::pin_mut;
use futures::{Stream, StreamExt};
use http::HeaderMap;
@@ -63,8 +63,13 @@ use policy::policy::Validator;
use policy::policy::action::Action;
use policy::policy::action::S3Action;
use query::instance::make_rustfsms;
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
use rustfs_filemeta::headers::{AMZ_DECODED_CONTENT_LENGTH, AMZ_OBJECT_TAGGING};
use rustfs_rio::CompressReader;
use rustfs_rio::HashReader;
use rustfs_rio::Reader;
use rustfs_rio::WarpReader;
use rustfs_utils::CompressionAlgorithm;
use rustfs_utils::path::path_join_buf;
use rustfs_zip::CompressionFormat;
use s3s::S3;
@@ -86,7 +91,6 @@ use tokio_stream::wrappers::ReceiverStream;
use tokio_tar::Archive;
use tokio_util::io::ReaderStream;
use tokio_util::io::StreamReader;
use tracing::debug;
use tracing::error;
use tracing::info;
use tracing::warn;
@@ -179,14 +183,31 @@ impl FS {
fpath = format!("{}/{}", prefix, fpath);
}
let size = f.header().size().unwrap_or_default() as usize;
let mut size = f.header().size().unwrap_or_default() as i64;
println!("Extracted: {}, size {}", fpath, size);
// Wrap the tar entry with BufReader to make it compatible with Reader trait
let reader = Box::new(tokio::io::BufReader::new(f));
let hrd = HashReader::new(reader, size as i64, size as i64, None, false).map_err(ApiError::from)?;
let mut reader = PutObjReader::new(hrd, size);
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(f));
let mut metadata = HashMap::new();
let actual_size = size;
if is_compressible(&HeaderMap::new(), &fpath) && size > MIN_COMPRESSIBLE_SIZE as i64 {
metadata.insert(
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
CompressionAlgorithm::default().to_string(),
);
metadata.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER,), size.to_string());
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
size = -1;
}
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
let mut reader = PutObjReader::new(hrd);
let _obj_info = store
.put_object(&bucket, &fpath, &mut reader, &ObjectOptions::default())
@@ -319,13 +340,10 @@ impl S3 for FS {
src_info.metadata_only = true;
}
let hrd = HashReader::new(gr.stream, gr.object_info.size as i64, gr.object_info.size as i64, None, false)
.map_err(ApiError::from)?;
let reader = Box::new(WarpReader::new(gr.stream));
let hrd = HashReader::new(reader, gr.object_info.size, gr.object_info.size, None, false).map_err(ApiError::from)?;
src_info.put_object_reader = Some(PutObjReader {
stream: hrd,
content_length: gr.object_info.size as usize,
});
src_info.put_object_reader = Some(PutObjReader::new(hrd));
// check quota
// TODO: src metadada
@@ -536,13 +554,13 @@ impl S3 for FS {
let rs = range.map(|v| match v {
Range::Int { first, last } => HTTPRangeSpec {
is_suffix_length: false,
start: first as usize,
end: last.map(|v| v as usize),
start: first as i64,
end: if let Some(last) = last { last as i64 } else { -1 },
},
Range::Suffix { length } => HTTPRangeSpec {
is_suffix_length: true,
start: length as usize,
end: None,
start: length as i64,
end: -1,
},
});
@@ -583,7 +601,7 @@ impl S3 for FS {
let body = Some(StreamingBlob::wrap(bytes_stream(
ReaderStream::with_capacity(reader.stream, DEFAULT_READ_BUFFER_SIZE),
info.size,
info.size as usize,
)));
let output = GetObjectOutput {
@@ -637,13 +655,13 @@ impl S3 for FS {
let rs = range.map(|v| match v {
Range::Int { first, last } => HTTPRangeSpec {
is_suffix_length: false,
start: first as usize,
end: last.map(|v| v as usize),
start: first as i64,
end: if let Some(last) = last { last as i64 } else { -1 },
},
Range::Suffix { length } => HTTPRangeSpec {
is_suffix_length: true,
start: length as usize,
end: None,
start: length as i64,
end: -1,
},
});
@@ -664,8 +682,8 @@ impl S3 for FS {
// warn!("head_object info {:?}", &info);
let content_type = {
if let Some(content_type) = info.content_type {
match ContentType::from_str(&content_type) {
if let Some(content_type) = &info.content_type {
match ContentType::from_str(content_type) {
Ok(res) => Some(res),
Err(err) => {
error!("parse content-type err {} {:?}", &content_type, err);
@@ -679,10 +697,14 @@ impl S3 for FS {
};
let last_modified = info.mod_time.map(Timestamp::from);
// TODO: range download
let content_length = info.get_actual_size().map_err(ApiError::from)?;
let metadata = info.user_defined;
let output = HeadObjectOutput {
content_length: Some(try_!(i64::try_from(info.size))),
content_length: Some(content_length),
content_type,
last_modified,
e_tag: info.etag,
@@ -806,7 +828,7 @@ impl S3 for FS {
let mut obj = Object {
key: Some(v.name.to_owned()),
last_modified: v.mod_time.map(Timestamp::from),
size: Some(v.size as i64),
size: Some(v.size),
e_tag: v.etag.clone(),
..Default::default()
};
@@ -885,7 +907,7 @@ impl S3 for FS {
ObjectVersion {
key: Some(v.name.to_owned()),
last_modified: v.mod_time.map(Timestamp::from),
size: Some(v.size as i64),
size: Some(v.size),
version_id: v.version_id.map(|v| v.to_string()),
is_latest: Some(v.is_latest),
e_tag: v.etag.clone(),
@@ -926,7 +948,6 @@ impl S3 for FS {
return self.put_object_extract(req).await;
}
info!("put object");
let input = req.input;
if let Some(ref storage_class) = input.storage_class {
@@ -949,7 +970,7 @@ impl S3 for FS {
let Some(body) = body else { return Err(s3_error!(IncompleteBody)) };
let content_length = match content_length {
let mut size = match content_length {
Some(c) => c,
None => {
if let Some(val) = req.headers.get(AMZ_DECODED_CONTENT_LENGTH) {
@@ -964,9 +985,6 @@ impl S3 for FS {
};
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
let body = Box::new(tokio::io::BufReader::new(body));
let hrd = HashReader::new(body, content_length as i64, content_length as i64, None, false).map_err(ApiError::from)?;
let mut reader = PutObjReader::new(hrd, content_length as usize);
// let body = Box::new(StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))));
@@ -984,10 +1002,32 @@ impl S3 for FS {
metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags);
}
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(body));
let actual_size = size;
if is_compressible(&req.headers, &key) && size > MIN_COMPRESSIBLE_SIZE as i64 {
metadata.insert(
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
CompressionAlgorithm::default().to_string(),
);
metadata.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER,), size.to_string());
let hrd = HashReader::new(reader, size as i64, size as i64, None, false).map_err(ApiError::from)?;
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
size = -1;
}
// TODO: md5 check
let reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
let mut reader = PutObjReader::new(reader);
let mt = metadata.clone();
let mt2 = metadata.clone();
let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(mt))
let mut opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(mt))
.await
.map_err(ApiError::from)?;
@@ -995,18 +1035,18 @@ impl S3 for FS {
get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, &opts);
let dsc = must_replicate(&bucket, &key, &repoptions).await;
warn!("dsc {}", &dsc.replicate_any().clone());
// warn!("dsc {}", &dsc.replicate_any().clone());
if dsc.replicate_any() {
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp");
let now: DateTime<Utc> = Utc::now();
let formatted_time = now.to_rfc3339();
metadata.insert(k, formatted_time);
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status");
metadata.insert(k, dsc.pending_status());
if let Some(metadata) = opts.user_defined.as_mut() {
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp");
let now: DateTime<Utc> = Utc::now();
let formatted_time = now.to_rfc3339();
metadata.insert(k, formatted_time);
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status");
metadata.insert(k, dsc.pending_status());
}
}
debug!("put_object opts {:?}", &opts);
let obj_info = store
.put_object(&bucket, &key, &mut reader, &opts)
.await
@@ -1058,6 +1098,13 @@ impl S3 for FS {
metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags);
}
if is_compressible(&req.headers, &key) {
metadata.insert(
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
CompressionAlgorithm::default().to_string(),
);
}
let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(metadata))
.await
.map_err(ApiError::from)?;
@@ -1095,7 +1142,7 @@ impl S3 for FS {
// let upload_id =
let body = body.ok_or_else(|| s3_error!(IncompleteBody))?;
let content_length = match content_length {
let mut size = match content_length {
Some(c) => c,
None => {
if let Some(val) = req.headers.get(AMZ_DECODED_CONTENT_LENGTH) {
@@ -1110,21 +1157,42 @@ impl S3 for FS {
};
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
let body = Box::new(tokio::io::BufReader::new(body));
let hrd = HashReader::new(body, content_length as i64, content_length as i64, None, false).map_err(ApiError::from)?;
// mc cp step 4
let mut data = PutObjReader::new(hrd, content_length as usize);
let opts = ObjectOptions::default();
let Some(store) = new_object_layer_fn() else {
return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string()));
};
// TODO: hash_reader
let fi = store
.get_multipart_info(&bucket, &key, &upload_id, &opts)
.await
.map_err(ApiError::from)?;
let is_compressible = fi
.user_defined
.contains_key(format!("{}compression", RESERVED_METADATA_PREFIX_LOWER).as_str());
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(body));
let actual_size = size;
if is_compressible {
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
size = -1;
}
// TODO: md5 check
let reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
let mut reader = PutObjReader::new(reader);
let info = store
.put_object_part(&bucket, &key, &upload_id, part_id, &mut data, &opts)
.put_object_part(&bucket, &key, &upload_id, part_id, &mut reader, &opts)
.await
.map_err(ApiError::from)?;

View File

@@ -108,7 +108,7 @@ impl ObjectStore for EcObjectStore {
let meta = ObjectMeta {
location: location.clone(),
last_modified: Utc::now(),
size: reader.object_info.size,
size: reader.object_info.size as usize,
e_tag: reader.object_info.etag,
version: None,
};
@@ -121,7 +121,7 @@ impl ObjectStore for EcObjectStore {
ConvertStream::new(reader.stream, self.delimiter.clone()),
DEFAULT_READ_BUFFER_SIZE,
),
reader.object_info.size,
reader.object_info.size as usize,
)
.boxed(),
)
@@ -129,7 +129,7 @@ impl ObjectStore for EcObjectStore {
object_store::GetResultPayload::Stream(
bytes_stream(
ReaderStream::with_capacity(reader.stream, DEFAULT_READ_BUFFER_SIZE),
reader.object_info.size,
reader.object_info.size as usize,
)
.boxed(),
)
@@ -137,7 +137,7 @@ impl ObjectStore for EcObjectStore {
Ok(GetResult {
payload,
meta,
range: 0..reader.object_info.size,
range: 0..reader.object_info.size as usize,
attributes,
})
}
@@ -161,7 +161,7 @@ impl ObjectStore for EcObjectStore {
Ok(ObjectMeta {
location: location.clone(),
last_modified: Utc::now(),
size: info.size,
size: info.size as usize,
e_tag: info.etag,
version: None,
})

View File

@@ -8,4 +8,5 @@ RUSTFS_CONSOLE_ADDRESS=":7001"
RUST_LOG=warn
RUSTFS_OBS_LOG_DIRECTORY="/var/logs/rustfs/"
RUSTFS_NS_SCANNER_INTERVAL=60
RUSTFS_SKIP_BACKGROUND_TASK=true
#RUSTFS_SKIP_BACKGROUND_TASK=true
RUSTFS_COMPRESSION_ENABLED=true

View File

@@ -19,7 +19,7 @@ mkdir -p ./target/volume/test{0..4}
if [ -z "$RUST_LOG" ]; then
export RUST_BACKTRACE=1
export RUST_LOG="rustfs=debug,ecstore=debug,s3s=debug,iam=debug"
export RUST_LOG="rustfs=debug,ecstore=debug,s3s=debug,iam=debug"
fi
# export RUSTFS_ERASURE_SET_DRIVE_COUNT=5
@@ -72,6 +72,11 @@ export OTEL_INSTRUMENTATION_VERSION="0.1.1"
export OTEL_INSTRUMENTATION_SCHEMA_URL="https://opentelemetry.io/schemas/1.31.0"
export OTEL_INSTRUMENTATION_ATTRIBUTES="env=production"
export RUSTFS_NS_SCANNER_INTERVAL=60 # 对象扫描间隔时间,单位为秒
# exportRUSTFS_SKIP_BACKGROUND_TASK=true
export RUSTFS_COMPRESSION_ENABLED=true # 是否启用压缩
# 事件消息配置
#export RUSTFS_EVENT_CONFIG="./deploy/config/event.example.toml"