mirror of
https://github.com/rustfs/rustfs.git
synced 2026-01-17 09:40:32 +00:00
Merge pull request #480 from rustfs/feat/compress
feat: add object compression support
This commit is contained in:
24
Cargo.lock
generated
24
Cargo.lock
generated
@@ -3666,6 +3666,7 @@ dependencies = [
|
||||
"shadow-rs",
|
||||
"siphasher 1.0.1",
|
||||
"smallvec",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
"thiserror 2.0.12",
|
||||
"time",
|
||||
@@ -8435,25 +8436,23 @@ dependencies = [
|
||||
"aes-gcm",
|
||||
"async-trait",
|
||||
"base64-simd",
|
||||
"brotli 8.0.1",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"crc32fast",
|
||||
"criterion",
|
||||
"flate2",
|
||||
"futures",
|
||||
"hex-simd",
|
||||
"http 1.3.1",
|
||||
"lz4",
|
||||
"md-5",
|
||||
"pin-project-lite",
|
||||
"rand 0.9.1",
|
||||
"reqwest",
|
||||
"rustfs-utils",
|
||||
"snap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tokio-test",
|
||||
"tokio-util",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8489,14 +8488,18 @@ version = "0.0.1"
|
||||
dependencies = [
|
||||
"base64-simd",
|
||||
"blake3",
|
||||
"brotli 8.0.1",
|
||||
"crc32fast",
|
||||
"flate2",
|
||||
"hex-simd",
|
||||
"highway",
|
||||
"lazy_static",
|
||||
"local-ip-address",
|
||||
"lz4",
|
||||
"md-5",
|
||||
"netif",
|
||||
"nix 0.30.1",
|
||||
"rand 0.9.1",
|
||||
"regex",
|
||||
"rustfs-config",
|
||||
"rustls 0.23.27",
|
||||
@@ -8505,11 +8508,13 @@ dependencies = [
|
||||
"serde",
|
||||
"sha2 0.10.9",
|
||||
"siphasher 1.0.1",
|
||||
"snap",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
"winapi",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9739,6 +9744,15 @@ version = "0.12.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
|
||||
|
||||
[[package]]
|
||||
name = "temp-env"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45107136c2ddf8c4b87453c02294fd0adf41751796e81e8ba3f7fd951977ab57"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.20.0"
|
||||
|
||||
@@ -157,6 +157,11 @@ prost = "0.13.5"
|
||||
prost-build = "0.13.5"
|
||||
protobuf = "3.7"
|
||||
rand = "0.9.1"
|
||||
brotli = "8.0.1"
|
||||
flate2 = "1.1.1"
|
||||
zstd = "0.13.3"
|
||||
lz4 = "1.28.1"
|
||||
snap = "1.1.1"
|
||||
rdkafka = { version = "0.37.0", features = ["tokio"] }
|
||||
reed-solomon-erasure = { version = "6.0.0", features = ["simd-accel"] }
|
||||
reed-solomon-simd = { version = "3.0.0" }
|
||||
|
||||
5
Makefile
5
Makefile
@@ -85,6 +85,11 @@ build-musl:
|
||||
@echo "🔨 Building rustfs for x86_64-unknown-linux-musl..."
|
||||
cargo build --target x86_64-unknown-linux-musl --bin rustfs -r
|
||||
|
||||
.PHONY: build-gnu
|
||||
build-gnu:
|
||||
@echo "🔨 Building rustfs for x86_64-unknown-linux-gnu..."
|
||||
cargo build --target x86_64-unknown-linux-gnu --bin rustfs -r
|
||||
|
||||
.PHONY: deploy-dev
|
||||
deploy-dev: build-musl
|
||||
@echo "🚀 Deploying to dev server: $${IP}"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::error::{Error, Result};
|
||||
use crate::headers::RESERVED_METADATA_PREFIX_LOWER;
|
||||
use crate::headers::RUSTFS_HEALING;
|
||||
use bytes::Bytes;
|
||||
use rmp_serde::Serializer;
|
||||
use rustfs_utils::HashAlgorithm;
|
||||
@@ -9,9 +10,6 @@ use std::collections::HashMap;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::headers::RESERVED_METADATA_PREFIX;
|
||||
use crate::headers::RUSTFS_HEALING;
|
||||
|
||||
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
|
||||
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
|
||||
|
||||
@@ -24,10 +22,10 @@ pub struct ObjectPartInfo {
|
||||
pub etag: String,
|
||||
pub number: usize,
|
||||
pub size: usize,
|
||||
pub actual_size: usize, // Original data size
|
||||
pub actual_size: i64, // Original data size
|
||||
pub mod_time: Option<OffsetDateTime>,
|
||||
// Index holds the index of the part in the erasure coding
|
||||
pub index: Option<Vec<u8>>,
|
||||
pub index: Option<Bytes>,
|
||||
// Checksums holds checksums of the part
|
||||
pub checksums: Option<HashMap<String, String>>,
|
||||
}
|
||||
@@ -118,15 +116,21 @@ impl ErasureInfo {
|
||||
}
|
||||
/// Calculate the total erasure file size for a given original size.
|
||||
// Returns the final erasure size from the original size
|
||||
pub fn shard_file_size(&self, total_length: usize) -> usize {
|
||||
pub fn shard_file_size(&self, total_length: i64) -> i64 {
|
||||
if total_length == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if total_length < 0 {
|
||||
return total_length;
|
||||
}
|
||||
|
||||
let total_length = total_length as usize;
|
||||
|
||||
let num_shards = total_length / self.block_size;
|
||||
let last_block_size = total_length % self.block_size;
|
||||
let last_shard_size = calc_shard_size(last_block_size, self.data_blocks);
|
||||
num_shards * self.shard_size() + last_shard_size
|
||||
(num_shards * self.shard_size() + last_shard_size) as i64
|
||||
}
|
||||
|
||||
/// Check if this ErasureInfo equals another ErasureInfo
|
||||
@@ -156,7 +160,7 @@ pub struct FileInfo {
|
||||
pub expire_restored: bool,
|
||||
pub data_dir: Option<Uuid>,
|
||||
pub mod_time: Option<OffsetDateTime>,
|
||||
pub size: usize,
|
||||
pub size: i64,
|
||||
// File mode bits
|
||||
pub mode: Option<u32>,
|
||||
// WrittenByVersion is the unix time stamp of the version that created this version of the object
|
||||
@@ -255,7 +259,8 @@ impl FileInfo {
|
||||
etag: String,
|
||||
part_size: usize,
|
||||
mod_time: Option<OffsetDateTime>,
|
||||
actual_size: usize,
|
||||
actual_size: i64,
|
||||
index: Option<Bytes>,
|
||||
) {
|
||||
let part = ObjectPartInfo {
|
||||
etag,
|
||||
@@ -263,7 +268,7 @@ impl FileInfo {
|
||||
size: part_size,
|
||||
mod_time,
|
||||
actual_size,
|
||||
index: None,
|
||||
index,
|
||||
checksums: None,
|
||||
};
|
||||
|
||||
@@ -306,6 +311,12 @@ impl FileInfo {
|
||||
self.metadata
|
||||
.insert(format!("{}inline-data", RESERVED_METADATA_PREFIX_LOWER).to_owned(), "true".to_owned());
|
||||
}
|
||||
|
||||
pub fn set_data_moved(&mut self) {
|
||||
self.metadata
|
||||
.insert(format!("{}data-moved", RESERVED_METADATA_PREFIX_LOWER).to_owned(), "true".to_owned());
|
||||
}
|
||||
|
||||
pub fn inline_data(&self) -> bool {
|
||||
self.metadata
|
||||
.contains_key(format!("{}inline-data", RESERVED_METADATA_PREFIX_LOWER).as_str())
|
||||
@@ -315,7 +326,7 @@ impl FileInfo {
|
||||
/// Check if the object is compressed
|
||||
pub fn is_compressed(&self) -> bool {
|
||||
self.metadata
|
||||
.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX))
|
||||
.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER))
|
||||
}
|
||||
|
||||
/// Check if the object is remote (transitioned to another tier)
|
||||
@@ -429,7 +440,7 @@ impl FileInfoVersions {
|
||||
}
|
||||
|
||||
/// Calculate the total size of all versions for this object
|
||||
pub fn size(&self) -> usize {
|
||||
pub fn size(&self) -> i64 {
|
||||
self.versions.iter().map(|v| v.size).sum()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::headers::{
|
||||
RESERVED_METADATA_PREFIX_LOWER, VERSION_PURGE_STATUS_KEY,
|
||||
};
|
||||
use byteorder::ByteOrder;
|
||||
use bytes::Bytes;
|
||||
use rmp::Marker;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp::Ordering;
|
||||
@@ -1379,9 +1380,9 @@ pub struct MetaObject {
|
||||
pub part_numbers: Vec<usize>, // Part Numbers
|
||||
pub part_etags: Vec<String>, // Part ETags
|
||||
pub part_sizes: Vec<usize>, // Part Sizes
|
||||
pub part_actual_sizes: Vec<usize>, // Part ActualSizes (compression)
|
||||
pub part_indices: Vec<Vec<u8>>, // Part Indexes (compression)
|
||||
pub size: usize, // Object version size
|
||||
pub part_actual_sizes: Vec<i64>, // Part ActualSizes (compression)
|
||||
pub part_indices: Vec<Bytes>, // Part Indexes (compression)
|
||||
pub size: i64, // Object version size
|
||||
pub mod_time: Option<OffsetDateTime>, // Object version modified time
|
||||
pub meta_sys: HashMap<String, Vec<u8>>, // Object version internal metadata
|
||||
pub meta_user: HashMap<String, String>, // Object version metadata set by user
|
||||
@@ -1538,7 +1539,7 @@ impl MetaObject {
|
||||
let mut buf = vec![0u8; blen as usize];
|
||||
cur.read_exact(&mut buf)?;
|
||||
|
||||
indices.push(buf);
|
||||
indices.push(Bytes::from(buf));
|
||||
}
|
||||
|
||||
self.part_indices = indices;
|
||||
@@ -1810,13 +1811,16 @@ impl MetaObject {
|
||||
}
|
||||
|
||||
for (k, v) in &self.meta_sys {
|
||||
if k == AMZ_STORAGE_CLASS && v == b"STANDARD" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if k.starts_with(RESERVED_METADATA_PREFIX)
|
||||
|| k.starts_with(RESERVED_METADATA_PREFIX_LOWER)
|
||||
|| k == VERSION_PURGE_STATUS_KEY
|
||||
{
|
||||
continue;
|
||||
metadata.insert(k.to_owned(), String::from_utf8(v.to_owned()).unwrap_or_default());
|
||||
}
|
||||
metadata.insert(k.to_owned(), String::from_utf8(v.to_owned()).unwrap_or_default());
|
||||
}
|
||||
|
||||
// todo: ReplicationState,Delete
|
||||
@@ -2799,13 +2803,13 @@ mod test {
|
||||
|
||||
// 2. 测试极大的文件大小
|
||||
let large_object = MetaObject {
|
||||
size: usize::MAX,
|
||||
size: i64::MAX,
|
||||
part_sizes: vec![usize::MAX],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// 应该能够处理大数值
|
||||
assert_eq!(large_object.size, usize::MAX);
|
||||
assert_eq!(large_object.size, i64::MAX);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -3367,7 +3371,7 @@ pub struct DetailedVersionStats {
|
||||
pub free_versions: usize,
|
||||
pub versions_with_data_dir: usize,
|
||||
pub versions_with_inline_data: usize,
|
||||
pub total_size: usize,
|
||||
pub total_size: i64,
|
||||
pub latest_mod_time: Option<OffsetDateTime>,
|
||||
}
|
||||
|
||||
|
||||
@@ -19,3 +19,5 @@ pub const X_RUSTFS_DATA_MOV: &str = "X-Rustfs-Internal-data-mov";
|
||||
pub const AMZ_OBJECT_TAGGING: &str = "X-Amz-Tagging";
|
||||
pub const AMZ_BUCKET_REPLICATION_STATUS: &str = "X-Amz-Replication-Status";
|
||||
pub const AMZ_DECODED_CONTENT_LENGTH: &str = "X-Amz-Decoded-Content-Length";
|
||||
|
||||
pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
|
||||
|
||||
@@ -91,7 +91,7 @@ pub fn create_complex_xlmeta() -> Result<Vec<u8>> {
|
||||
let mut fm = FileMeta::new();
|
||||
|
||||
// 创建10个版本的对象
|
||||
for i in 0..10 {
|
||||
for i in 0i64..10i64 {
|
||||
let version_id = Uuid::new_v4();
|
||||
let data_dir = if i % 3 == 0 { Some(Uuid::new_v4()) } else { None };
|
||||
|
||||
@@ -113,9 +113,9 @@ pub fn create_complex_xlmeta() -> Result<Vec<u8>> {
|
||||
part_numbers: vec![1],
|
||||
part_etags: vec![format!("etag-{:08x}", i)],
|
||||
part_sizes: vec![1024 * (i + 1) as usize],
|
||||
part_actual_sizes: vec![1024 * (i + 1) as usize],
|
||||
part_actual_sizes: vec![1024 * (i + 1)],
|
||||
part_indices: Vec::new(),
|
||||
size: 1024 * (i + 1) as usize,
|
||||
size: 1024 * (i + 1),
|
||||
mod_time: Some(OffsetDateTime::from_unix_timestamp(1705312200 + i * 60)?),
|
||||
meta_sys: HashMap::new(),
|
||||
meta_user: metadata,
|
||||
@@ -221,7 +221,7 @@ pub fn create_xlmeta_with_inline_data() -> Result<Vec<u8>> {
|
||||
part_sizes: vec![inline_data.len()],
|
||||
part_actual_sizes: Vec::new(),
|
||||
part_indices: Vec::new(),
|
||||
size: inline_data.len(),
|
||||
size: inline_data.len() as i64,
|
||||
mod_time: Some(OffsetDateTime::now_utc()),
|
||||
meta_sys: HashMap::new(),
|
||||
meta_user: HashMap::new(),
|
||||
|
||||
@@ -14,23 +14,20 @@ tokio = { workspace = true, features = ["full"] }
|
||||
rand = { workspace = true }
|
||||
md-5 = { workspace = true }
|
||||
http.workspace = true
|
||||
flate2 = "1.1.1"
|
||||
aes-gcm = "0.10.3"
|
||||
crc32fast = "1.4.2"
|
||||
pin-project-lite.workspace = true
|
||||
async-trait.workspace = true
|
||||
base64-simd = "0.8.0"
|
||||
hex-simd = "0.8.0"
|
||||
zstd = "0.13.3"
|
||||
lz4 = "1.28.1"
|
||||
brotli = "8.0.1"
|
||||
snap = "1.1.1"
|
||||
|
||||
serde = { workspace = true }
|
||||
bytes.workspace = true
|
||||
reqwest.workspace = true
|
||||
tokio-util.workspace = true
|
||||
futures.workspace = true
|
||||
rustfs-utils = {workspace = true, features= ["io","hash"]}
|
||||
rustfs-utils = {workspace = true, features= ["io","hash","compress"]}
|
||||
byteorder.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = ["async", "async_tokio", "tokio"] }
|
||||
|
||||
672
crates/rio/src/compress_index.rs
Normal file
672
crates/rio/src/compress_index.rs
Normal file
@@ -0,0 +1,672 @@
|
||||
use bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::{self, Read, Seek, SeekFrom};
|
||||
|
||||
const S2_INDEX_HEADER: &[u8] = b"s2idx\x00";
|
||||
const S2_INDEX_TRAILER: &[u8] = b"\x00xdi2s";
|
||||
const MAX_INDEX_ENTRIES: usize = 1 << 16;
|
||||
const MIN_INDEX_DIST: i64 = 1 << 20;
|
||||
// const MIN_INDEX_DIST: i64 = 0;
|
||||
|
||||
pub trait TryGetIndex {
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Index {
|
||||
pub total_uncompressed: i64,
|
||||
pub total_compressed: i64,
|
||||
info: Vec<IndexInfo>,
|
||||
est_block_uncomp: i64,
|
||||
}
|
||||
|
||||
impl Default for Index {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IndexInfo {
|
||||
pub compressed_offset: i64,
|
||||
pub uncompressed_offset: i64,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl Index {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
total_uncompressed: -1,
|
||||
total_compressed: -1,
|
||||
info: Vec::new(),
|
||||
est_block_uncomp: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn reset(&mut self, max_block: usize) {
|
||||
self.est_block_uncomp = max_block as i64;
|
||||
self.total_compressed = -1;
|
||||
self.total_uncompressed = -1;
|
||||
self.info.clear();
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.info.len()
|
||||
}
|
||||
|
||||
fn alloc_infos(&mut self, n: usize) {
|
||||
if n > MAX_INDEX_ENTRIES {
|
||||
panic!("n > MAX_INDEX_ENTRIES");
|
||||
}
|
||||
self.info = Vec::with_capacity(n);
|
||||
}
|
||||
|
||||
pub fn add(&mut self, compressed_offset: i64, uncompressed_offset: i64) -> io::Result<()> {
|
||||
if self.info.is_empty() {
|
||||
self.info.push(IndexInfo {
|
||||
compressed_offset,
|
||||
uncompressed_offset,
|
||||
});
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let last_idx = self.info.len() - 1;
|
||||
let latest = &mut self.info[last_idx];
|
||||
|
||||
if latest.uncompressed_offset == uncompressed_offset {
|
||||
latest.compressed_offset = compressed_offset;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if latest.uncompressed_offset > uncompressed_offset {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"internal error: Earlier uncompressed received ({} > {})",
|
||||
latest.uncompressed_offset, uncompressed_offset
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if latest.compressed_offset > compressed_offset {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"internal error: Earlier compressed received ({} > {})",
|
||||
latest.uncompressed_offset, uncompressed_offset
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if latest.uncompressed_offset + MIN_INDEX_DIST > uncompressed_offset {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.info.push(IndexInfo {
|
||||
compressed_offset,
|
||||
uncompressed_offset,
|
||||
});
|
||||
|
||||
self.total_compressed = compressed_offset;
|
||||
self.total_uncompressed = uncompressed_offset;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn find(&self, offset: i64) -> io::Result<(i64, i64)> {
|
||||
if self.total_uncompressed < 0 {
|
||||
return Err(io::Error::other("corrupt index"));
|
||||
}
|
||||
|
||||
let mut offset = offset;
|
||||
if offset < 0 {
|
||||
offset += self.total_uncompressed;
|
||||
if offset < 0 {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "offset out of bounds"));
|
||||
}
|
||||
}
|
||||
|
||||
if offset > self.total_uncompressed {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "offset out of bounds"));
|
||||
}
|
||||
|
||||
if self.info.is_empty() {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "empty index"));
|
||||
}
|
||||
|
||||
if self.info.len() > 200 {
|
||||
let n = self
|
||||
.info
|
||||
.binary_search_by(|info| {
|
||||
if info.uncompressed_offset > offset {
|
||||
std::cmp::Ordering::Greater
|
||||
} else {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|i| i);
|
||||
|
||||
if n == 0 {
|
||||
return Ok((self.info[0].compressed_offset, self.info[0].uncompressed_offset));
|
||||
}
|
||||
return Ok((self.info[n - 1].compressed_offset, self.info[n - 1].uncompressed_offset));
|
||||
}
|
||||
|
||||
let mut compressed_off = 0;
|
||||
let mut uncompressed_off = 0;
|
||||
for info in &self.info {
|
||||
if info.uncompressed_offset > offset {
|
||||
break;
|
||||
}
|
||||
compressed_off = info.compressed_offset;
|
||||
uncompressed_off = info.uncompressed_offset;
|
||||
}
|
||||
Ok((compressed_off, uncompressed_off))
|
||||
}
|
||||
|
||||
fn reduce(&mut self) {
|
||||
if self.info.len() < MAX_INDEX_ENTRIES && self.est_block_uncomp >= MIN_INDEX_DIST {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut remove_n = (self.info.len() + 1) / MAX_INDEX_ENTRIES;
|
||||
let src = self.info.clone();
|
||||
let mut j = 0;
|
||||
|
||||
while self.est_block_uncomp * (remove_n as i64 + 1) < MIN_INDEX_DIST && self.info.len() / (remove_n + 1) > 1000 {
|
||||
remove_n += 1;
|
||||
}
|
||||
|
||||
let mut idx = 0;
|
||||
while idx < src.len() {
|
||||
self.info[j] = src[idx].clone();
|
||||
j += 1;
|
||||
idx += remove_n + 1;
|
||||
}
|
||||
self.info.truncate(j);
|
||||
self.est_block_uncomp += self.est_block_uncomp * remove_n as i64;
|
||||
}
|
||||
|
||||
pub fn into_vec(mut self) -> Bytes {
|
||||
let mut b = Vec::new();
|
||||
self.append_to(&mut b, self.total_uncompressed, self.total_compressed);
|
||||
Bytes::from(b)
|
||||
}
|
||||
|
||||
pub fn append_to(&mut self, b: &mut Vec<u8>, uncomp_total: i64, comp_total: i64) {
|
||||
self.reduce();
|
||||
let init_size = b.len();
|
||||
|
||||
// Add skippable header
|
||||
b.extend_from_slice(&[0x50, 0x2A, 0x4D, 0x18]); // ChunkTypeIndex
|
||||
b.extend_from_slice(&[0, 0, 0]); // Placeholder for chunk length
|
||||
|
||||
// Add header
|
||||
b.extend_from_slice(S2_INDEX_HEADER);
|
||||
|
||||
// Add total sizes
|
||||
let mut tmp = [0u8; 8];
|
||||
let n = write_varint(&mut tmp, uncomp_total);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
let n = write_varint(&mut tmp, comp_total);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
let n = write_varint(&mut tmp, self.est_block_uncomp);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
let n = write_varint(&mut tmp, self.info.len() as i64);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
|
||||
// Check if we should add uncompressed offsets
|
||||
let mut has_uncompressed = 0u8;
|
||||
for (idx, info) in self.info.iter().enumerate() {
|
||||
if idx == 0 {
|
||||
if info.uncompressed_offset != 0 {
|
||||
has_uncompressed = 1;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if info.uncompressed_offset != self.info[idx - 1].uncompressed_offset + self.est_block_uncomp {
|
||||
has_uncompressed = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
b.push(has_uncompressed);
|
||||
|
||||
// Add uncompressed offsets if needed
|
||||
if has_uncompressed == 1 {
|
||||
for (idx, info) in self.info.iter().enumerate() {
|
||||
let mut u_off = info.uncompressed_offset;
|
||||
if idx > 0 {
|
||||
let prev = &self.info[idx - 1];
|
||||
u_off -= prev.uncompressed_offset + self.est_block_uncomp;
|
||||
}
|
||||
let n = write_varint(&mut tmp, u_off);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
}
|
||||
}
|
||||
|
||||
// Add compressed offsets
|
||||
let mut c_predict = self.est_block_uncomp / 2;
|
||||
for (idx, info) in self.info.iter().enumerate() {
|
||||
let mut c_off = info.compressed_offset;
|
||||
if idx > 0 {
|
||||
let prev = &self.info[idx - 1];
|
||||
c_off -= prev.compressed_offset + c_predict;
|
||||
c_predict += c_off / 2;
|
||||
}
|
||||
let n = write_varint(&mut tmp, c_off);
|
||||
b.extend_from_slice(&tmp[..n]);
|
||||
}
|
||||
|
||||
// Add total size and trailer
|
||||
let total_size = (b.len() - init_size + 4 + S2_INDEX_TRAILER.len()) as u32;
|
||||
b.extend_from_slice(&total_size.to_le_bytes());
|
||||
b.extend_from_slice(S2_INDEX_TRAILER);
|
||||
|
||||
// Update chunk length
|
||||
let chunk_len = b.len() - init_size - 4;
|
||||
b[init_size + 1] = chunk_len as u8;
|
||||
b[init_size + 2] = (chunk_len >> 8) as u8;
|
||||
b[init_size + 3] = (chunk_len >> 16) as u8;
|
||||
}
|
||||
|
||||
pub fn load<'a>(&mut self, mut b: &'a [u8]) -> io::Result<&'a [u8]> {
|
||||
if b.len() <= 4 + S2_INDEX_HEADER.len() + S2_INDEX_TRAILER.len() {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
|
||||
}
|
||||
|
||||
if b[0] != 0x50 || b[1] != 0x2A || b[2] != 0x4D || b[3] != 0x18 {
|
||||
return Err(io::Error::other("invalid chunk type"));
|
||||
}
|
||||
|
||||
let chunk_len = (b[1] as usize) | ((b[2] as usize) << 8) | ((b[3] as usize) << 16);
|
||||
b = &b[4..];
|
||||
|
||||
if b.len() < chunk_len {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
|
||||
}
|
||||
|
||||
if !b.starts_with(S2_INDEX_HEADER) {
|
||||
return Err(io::Error::other("invalid header"));
|
||||
}
|
||||
b = &b[S2_INDEX_HEADER.len()..];
|
||||
|
||||
// Read total uncompressed
|
||||
let (v, n) = read_varint(b)?;
|
||||
if v < 0 {
|
||||
return Err(io::Error::other("invalid uncompressed size"));
|
||||
}
|
||||
self.total_uncompressed = v;
|
||||
b = &b[n..];
|
||||
|
||||
// Read total compressed
|
||||
let (v, n) = read_varint(b)?;
|
||||
if v < 0 {
|
||||
return Err(io::Error::other("invalid compressed size"));
|
||||
}
|
||||
self.total_compressed = v;
|
||||
b = &b[n..];
|
||||
|
||||
// Read est block uncomp
|
||||
let (v, n) = read_varint(b)?;
|
||||
if v < 0 {
|
||||
return Err(io::Error::other("invalid block size"));
|
||||
}
|
||||
self.est_block_uncomp = v;
|
||||
b = &b[n..];
|
||||
|
||||
// Read number of entries
|
||||
let (v, n) = read_varint(b)?;
|
||||
if v < 0 || v > MAX_INDEX_ENTRIES as i64 {
|
||||
return Err(io::Error::other("invalid number of entries"));
|
||||
}
|
||||
let entries = v as usize;
|
||||
b = &b[n..];
|
||||
|
||||
self.alloc_infos(entries);
|
||||
|
||||
if b.is_empty() {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
|
||||
}
|
||||
|
||||
let has_uncompressed = b[0];
|
||||
b = &b[1..];
|
||||
|
||||
if has_uncompressed & 1 != has_uncompressed {
|
||||
return Err(io::Error::other("invalid uncompressed flag"));
|
||||
}
|
||||
|
||||
// Read uncompressed offsets
|
||||
for idx in 0..entries {
|
||||
let mut u_off = 0i64;
|
||||
if has_uncompressed != 0 {
|
||||
let (v, n) = read_varint(b)?;
|
||||
u_off = v;
|
||||
b = &b[n..];
|
||||
}
|
||||
|
||||
if idx > 0 {
|
||||
let prev = self.info[idx - 1].uncompressed_offset;
|
||||
u_off += prev + self.est_block_uncomp;
|
||||
if u_off <= prev {
|
||||
return Err(io::Error::other("invalid offset"));
|
||||
}
|
||||
}
|
||||
if u_off < 0 {
|
||||
return Err(io::Error::other("negative offset"));
|
||||
}
|
||||
self.info[idx].uncompressed_offset = u_off;
|
||||
}
|
||||
|
||||
// Read compressed offsets
|
||||
let mut c_predict = self.est_block_uncomp / 2;
|
||||
for idx in 0..entries {
|
||||
let (v, n) = read_varint(b)?;
|
||||
let mut c_off = v;
|
||||
b = &b[n..];
|
||||
|
||||
if idx > 0 {
|
||||
c_predict += c_off / 2;
|
||||
let prev = self.info[idx - 1].compressed_offset;
|
||||
c_off += prev + c_predict;
|
||||
if c_off <= prev {
|
||||
return Err(io::Error::other("invalid offset"));
|
||||
}
|
||||
}
|
||||
if c_off < 0 {
|
||||
return Err(io::Error::other("negative offset"));
|
||||
}
|
||||
self.info[idx].compressed_offset = c_off;
|
||||
}
|
||||
|
||||
if b.len() < 4 + S2_INDEX_TRAILER.len() {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "buffer too small"));
|
||||
}
|
||||
|
||||
// Skip size
|
||||
b = &b[4..];
|
||||
|
||||
// Check trailer
|
||||
if !b.starts_with(S2_INDEX_TRAILER) {
|
||||
return Err(io::Error::other("invalid trailer"));
|
||||
}
|
||||
|
||||
Ok(&b[S2_INDEX_TRAILER.len()..])
|
||||
}
|
||||
|
||||
pub fn load_stream<R: Read + Seek>(&mut self, mut rs: R) -> io::Result<()> {
|
||||
// Go to end
|
||||
rs.seek(SeekFrom::End(-10))?;
|
||||
let mut tmp = [0u8; 10];
|
||||
rs.read_exact(&mut tmp)?;
|
||||
|
||||
// Check trailer
|
||||
if &tmp[4..4 + S2_INDEX_TRAILER.len()] != S2_INDEX_TRAILER {
|
||||
return Err(io::Error::other("invalid trailer"));
|
||||
}
|
||||
|
||||
let sz = u32::from_le_bytes(tmp[..4].try_into().unwrap());
|
||||
if sz > 0x7fffffff {
|
||||
return Err(io::Error::other("size too large"));
|
||||
}
|
||||
|
||||
rs.seek(SeekFrom::End(-(sz as i64)))?;
|
||||
|
||||
let mut buf = vec![0u8; sz as usize];
|
||||
rs.read_exact(&mut buf)?;
|
||||
|
||||
self.load(&buf)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn to_json(&self) -> serde_json::Result<Vec<u8>> {
|
||||
#[derive(Serialize)]
|
||||
struct Offset {
|
||||
compressed: i64,
|
||||
uncompressed: i64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct IndexJson {
|
||||
total_uncompressed: i64,
|
||||
total_compressed: i64,
|
||||
offsets: Vec<Offset>,
|
||||
est_block_uncompressed: i64,
|
||||
}
|
||||
|
||||
let json = IndexJson {
|
||||
total_uncompressed: self.total_uncompressed,
|
||||
total_compressed: self.total_compressed,
|
||||
offsets: self
|
||||
.info
|
||||
.iter()
|
||||
.map(|info| Offset {
|
||||
compressed: info.compressed_offset,
|
||||
uncompressed: info.uncompressed_offset,
|
||||
})
|
||||
.collect(),
|
||||
est_block_uncompressed: self.est_block_uncomp,
|
||||
};
|
||||
|
||||
serde_json::to_vec_pretty(&json)
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for varint encoding/decoding
|
||||
fn write_varint(buf: &mut [u8], mut v: i64) -> usize {
|
||||
let mut n = 0;
|
||||
while v >= 0x80 {
|
||||
buf[n] = (v as u8) | 0x80;
|
||||
v >>= 7;
|
||||
n += 1;
|
||||
}
|
||||
buf[n] = v as u8;
|
||||
n + 1
|
||||
}
|
||||
|
||||
fn read_varint(buf: &[u8]) -> io::Result<(i64, usize)> {
|
||||
let mut result = 0i64;
|
||||
let mut shift = 0;
|
||||
let mut n = 0;
|
||||
|
||||
while n < buf.len() {
|
||||
let byte = buf[n];
|
||||
n += 1;
|
||||
result |= ((byte & 0x7F) as i64) << shift;
|
||||
if byte < 0x80 {
|
||||
return Ok((result, n));
|
||||
}
|
||||
shift += 7;
|
||||
}
|
||||
|
||||
Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected EOF"))
|
||||
}
|
||||
|
||||
// Helper functions for index header manipulation
|
||||
#[allow(dead_code)]
|
||||
pub fn remove_index_headers(b: &[u8]) -> Option<&[u8]> {
|
||||
if b.len() < 4 + S2_INDEX_TRAILER.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Skip size
|
||||
let b = &b[4..];
|
||||
|
||||
// Check trailer
|
||||
if !b.starts_with(S2_INDEX_TRAILER) {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(&b[S2_INDEX_TRAILER.len()..])
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn restore_index_headers(in_data: &[u8]) -> Vec<u8> {
|
||||
if in_data.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut b = Vec::with_capacity(4 + S2_INDEX_HEADER.len() + in_data.len() + S2_INDEX_TRAILER.len() + 4);
|
||||
b.extend_from_slice(&[0x50, 0x2A, 0x4D, 0x18]);
|
||||
b.extend_from_slice(S2_INDEX_HEADER);
|
||||
b.extend_from_slice(in_data);
|
||||
|
||||
let total_size = (b.len() + 4 + S2_INDEX_TRAILER.len()) as u32;
|
||||
b.extend_from_slice(&total_size.to_le_bytes());
|
||||
b.extend_from_slice(S2_INDEX_TRAILER);
|
||||
|
||||
let chunk_len = b.len() - 4;
|
||||
b[1] = chunk_len as u8;
|
||||
b[2] = (chunk_len >> 8) as u8;
|
||||
b[3] = (chunk_len >> 16) as u8;
|
||||
|
||||
b
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_index_new() {
|
||||
let index = Index::new();
|
||||
assert_eq!(index.total_uncompressed, -1);
|
||||
assert_eq!(index.total_compressed, -1);
|
||||
assert!(index.info.is_empty());
|
||||
assert_eq!(index.est_block_uncomp, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_add() -> io::Result<()> {
|
||||
let mut index = Index::new();
|
||||
|
||||
// 测试添加第一个索引
|
||||
index.add(100, 1000)?;
|
||||
assert_eq!(index.info.len(), 1);
|
||||
assert_eq!(index.info[0].compressed_offset, 100);
|
||||
assert_eq!(index.info[0].uncompressed_offset, 1000);
|
||||
|
||||
// 测试添加相同未压缩偏移量的索引
|
||||
index.add(200, 1000)?;
|
||||
assert_eq!(index.info.len(), 1);
|
||||
assert_eq!(index.info[0].compressed_offset, 200);
|
||||
assert_eq!(index.info[0].uncompressed_offset, 1000);
|
||||
|
||||
// 测试添加新的索引(确保距离足够大)
|
||||
index.add(300, 2000 + MIN_INDEX_DIST)?;
|
||||
assert_eq!(index.info.len(), 2);
|
||||
assert_eq!(index.info[1].compressed_offset, 300);
|
||||
assert_eq!(index.info[1].uncompressed_offset, 2000 + MIN_INDEX_DIST);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_add_errors() {
|
||||
let mut index = Index::new();
|
||||
|
||||
// 添加初始索引
|
||||
index.add(100, 1000).unwrap();
|
||||
|
||||
// 测试添加更小的未压缩偏移量
|
||||
let err = index.add(200, 500).unwrap_err();
|
||||
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
|
||||
|
||||
// 测试添加更小的压缩偏移量
|
||||
let err = index.add(50, 2000).unwrap_err();
|
||||
assert_eq!(err.kind(), io::ErrorKind::InvalidData);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_find() -> io::Result<()> {
|
||||
let mut index = Index::new();
|
||||
index.total_uncompressed = 1000 + MIN_INDEX_DIST * 3;
|
||||
index.total_compressed = 5000;
|
||||
|
||||
// 添加一些测试数据,确保索引间距满足 MIN_INDEX_DIST 要求
|
||||
index.add(100, 1000)?;
|
||||
index.add(300, 1000 + MIN_INDEX_DIST)?;
|
||||
index.add(500, 1000 + MIN_INDEX_DIST * 2)?;
|
||||
|
||||
// 测试查找存在的偏移量
|
||||
let (comp, uncomp) = index.find(1500)?;
|
||||
assert_eq!(comp, 100);
|
||||
assert_eq!(uncomp, 1000);
|
||||
|
||||
// 测试查找边界值
|
||||
let (comp, uncomp) = index.find(1000 + MIN_INDEX_DIST)?;
|
||||
assert_eq!(comp, 300);
|
||||
assert_eq!(uncomp, 1000 + MIN_INDEX_DIST);
|
||||
|
||||
// 测试查找最后一个索引
|
||||
let (comp, uncomp) = index.find(1000 + MIN_INDEX_DIST * 2)?;
|
||||
assert_eq!(comp, 500);
|
||||
assert_eq!(uncomp, 1000 + MIN_INDEX_DIST * 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_find_errors() {
|
||||
let mut index = Index::new();
|
||||
index.total_uncompressed = 10000;
|
||||
index.total_compressed = 5000;
|
||||
|
||||
// 测试未初始化的索引
|
||||
let uninit_index = Index::new();
|
||||
let err = uninit_index.find(1000).unwrap_err();
|
||||
assert_eq!(err.kind(), io::ErrorKind::Other);
|
||||
|
||||
// 测试超出范围的偏移量
|
||||
let err = index.find(15000).unwrap_err();
|
||||
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
|
||||
|
||||
// 测试负数偏移量
|
||||
let err = match index.find(-1000) {
|
||||
Ok(_) => panic!("should be error"),
|
||||
Err(e) => e,
|
||||
};
|
||||
assert_eq!(err.kind(), io::ErrorKind::UnexpectedEof);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_reduce() {
|
||||
let mut index = Index::new();
|
||||
index.est_block_uncomp = MIN_INDEX_DIST;
|
||||
|
||||
// 添加超过最大索引数量的条目,确保间距满足 MIN_INDEX_DIST 要求
|
||||
for i in 0..MAX_INDEX_ENTRIES + 100 {
|
||||
index.add(i as i64 * 100, i as i64 * MIN_INDEX_DIST).unwrap();
|
||||
}
|
||||
|
||||
// 手动调用 reduce 方法
|
||||
index.reduce();
|
||||
|
||||
// 验证索引数量是否被正确减少
|
||||
assert!(index.info.len() <= MAX_INDEX_ENTRIES);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_index_json() -> io::Result<()> {
|
||||
let mut index = Index::new();
|
||||
|
||||
// 添加一些测试数据
|
||||
index.add(100, 1000)?;
|
||||
index.add(300, 2000 + MIN_INDEX_DIST)?;
|
||||
|
||||
// 测试 JSON 序列化
|
||||
let json = index.to_json().unwrap();
|
||||
let json_str = String::from_utf8(json).unwrap();
|
||||
|
||||
println!("json_str: {}", json_str);
|
||||
// 验证 JSON 内容
|
||||
|
||||
assert!(json_str.contains("\"compressed\": 100"));
|
||||
assert!(json_str.contains("\"uncompressed\": 1000"));
|
||||
assert!(json_str.contains("\"est_block_uncompressed\": 0"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,22 @@
|
||||
use crate::compress::{CompressionAlgorithm, compress_block, decompress_block};
|
||||
use crate::compress_index::{Index, TryGetIndex};
|
||||
use crate::{EtagResolvable, HashReaderDetector};
|
||||
use crate::{HashReaderMut, Reader};
|
||||
use pin_project_lite::pin_project;
|
||||
use rustfs_utils::{put_uvarint, put_uvarint_len, uvarint};
|
||||
use rustfs_utils::compress::{CompressionAlgorithm, compress_block, decompress_block};
|
||||
use rustfs_utils::{put_uvarint, uvarint};
|
||||
use std::cmp::min;
|
||||
use std::io::{self};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
// use tracing::error;
|
||||
|
||||
const COMPRESS_TYPE_COMPRESSED: u8 = 0x00;
|
||||
const COMPRESS_TYPE_UNCOMPRESSED: u8 = 0x01;
|
||||
const COMPRESS_TYPE_END: u8 = 0xFF;
|
||||
|
||||
const DEFAULT_BLOCK_SIZE: usize = 1 << 20; // 1MB
|
||||
const HEADER_LEN: usize = 8;
|
||||
|
||||
pin_project! {
|
||||
#[derive(Debug)]
|
||||
@@ -19,6 +29,11 @@ pin_project! {
|
||||
done: bool,
|
||||
block_size: usize,
|
||||
compression_algorithm: CompressionAlgorithm,
|
||||
index: Index,
|
||||
written: usize,
|
||||
uncomp_written: usize,
|
||||
temp_buffer: Vec<u8>,
|
||||
temp_pos: usize,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +48,12 @@ where
|
||||
pos: 0,
|
||||
done: false,
|
||||
compression_algorithm,
|
||||
block_size: 1 << 20, // Default 1MB
|
||||
block_size: DEFAULT_BLOCK_SIZE,
|
||||
index: Index::new(),
|
||||
written: 0,
|
||||
uncomp_written: 0,
|
||||
temp_buffer: Vec::with_capacity(DEFAULT_BLOCK_SIZE), // Pre-allocate capacity
|
||||
temp_pos: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,19 +66,33 @@ where
|
||||
done: false,
|
||||
compression_algorithm,
|
||||
block_size,
|
||||
index: Index::new(),
|
||||
written: 0,
|
||||
uncomp_written: 0,
|
||||
temp_buffer: Vec::with_capacity(block_size),
|
||||
temp_pos: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> TryGetIndex for CompressReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
{
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
Some(&self.index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> AsyncRead for CompressReader<R>
|
||||
where
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// If buffer has data, serve from buffer first
|
||||
// Copy from buffer first if available
|
||||
if *this.pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.pos);
|
||||
let to_copy = min(buf.remaining(), this.buffer.len() - *this.pos);
|
||||
buf.put_slice(&this.buffer[*this.pos..*this.pos + to_copy]);
|
||||
*this.pos += to_copy;
|
||||
if *this.pos == this.buffer.len() {
|
||||
@@ -67,74 +101,60 @@ where
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
if *this.done {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Read from inner, only read block_size bytes each time
|
||||
let mut temp = vec![0u8; *this.block_size];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
// EOF, write end header
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0xFF;
|
||||
*this.buffer = header.to_vec();
|
||||
*this.pos = 0;
|
||||
*this.done = true;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
} else {
|
||||
let uncompressed_data = &temp_buf.filled()[..n];
|
||||
|
||||
let crc = crc32fast::hash(uncompressed_data);
|
||||
let compressed_data = compress_block(uncompressed_data, *this.compression_algorithm);
|
||||
|
||||
let uncompressed_len = n;
|
||||
let compressed_len = compressed_data.len();
|
||||
let int_len = put_uvarint_len(uncompressed_len as u64);
|
||||
|
||||
let len = compressed_len + int_len + 4; // 4 bytes for CRC32
|
||||
|
||||
// Header: 8 bytes
|
||||
// 0: type (0 = compressed, 1 = uncompressed, 0xFF = end)
|
||||
// 1-3: length (little endian u24)
|
||||
// 4-7: crc32 (little endian u32)
|
||||
let mut header = [0u8; 8];
|
||||
header[0] = 0x00; // 0 = compressed
|
||||
header[1] = (len & 0xFF) as u8;
|
||||
header[2] = ((len >> 8) & 0xFF) as u8;
|
||||
header[3] = ((len >> 16) & 0xFF) as u8;
|
||||
header[4] = (crc & 0xFF) as u8;
|
||||
header[5] = ((crc >> 8) & 0xFF) as u8;
|
||||
header[6] = ((crc >> 16) & 0xFF) as u8;
|
||||
header[7] = ((crc >> 24) & 0xFF) as u8;
|
||||
|
||||
// Combine header(4+4) + uncompressed_len + compressed
|
||||
let mut out = Vec::with_capacity(len + 4);
|
||||
out.extend_from_slice(&header);
|
||||
|
||||
let mut uncompressed_len_buf = vec![0u8; int_len];
|
||||
put_uvarint(&mut uncompressed_len_buf, uncompressed_len as u64);
|
||||
out.extend_from_slice(&uncompressed_len_buf);
|
||||
|
||||
out.extend_from_slice(&compressed_data);
|
||||
|
||||
*this.buffer = out;
|
||||
*this.pos = 0;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.pos += to_copy;
|
||||
Poll::Ready(Ok(()))
|
||||
// Fill temporary buffer
|
||||
while this.temp_buffer.len() < *this.block_size {
|
||||
let remaining = *this.block_size - this.temp_buffer.len();
|
||||
let mut temp = vec![0u8; remaining];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => {
|
||||
if this.temp_buffer.is_empty() {
|
||||
return Poll::Pending;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Poll::Ready(Ok(())) => {
|
||||
let n = temp_buf.filled().len();
|
||||
if n == 0 {
|
||||
if this.temp_buffer.is_empty() {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
this.temp_buffer.extend_from_slice(&temp[..n]);
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
// error!("CompressReader poll_read: read inner error: {e}");
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
|
||||
}
|
||||
// Process accumulated data
|
||||
if !this.temp_buffer.is_empty() {
|
||||
let uncompressed_data = &this.temp_buffer;
|
||||
let out = build_compressed_block(uncompressed_data, *this.compression_algorithm);
|
||||
*this.written += out.len();
|
||||
*this.uncomp_written += uncompressed_data.len();
|
||||
if let Err(e) = this.index.add(*this.written as i64, *this.uncomp_written as i64) {
|
||||
// error!("CompressReader index add error: {e}");
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
*this.buffer = out;
|
||||
*this.pos = 0;
|
||||
this.temp_buffer.truncate(0); // More efficient way to clear
|
||||
let to_copy = min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.pos += to_copy;
|
||||
if *this.pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.pos = 0;
|
||||
}
|
||||
Poll::Ready(Ok(()))
|
||||
} else {
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -163,9 +183,10 @@ where
|
||||
|
||||
pin_project! {
|
||||
/// A reader wrapper that decompresses data on the fly using DEFLATE algorithm.
|
||||
// 1~3 bytes store the length of the compressed data
|
||||
// The first byte stores the type of the compressed data: 00 = compressed, 01 = uncompressed
|
||||
// The first 4 bytes store the CRC32 checksum of the compressed data
|
||||
/// Header format:
|
||||
/// - First byte: compression type (00 = compressed, 01 = uncompressed, FF = end)
|
||||
/// - Bytes 1-3: length of compressed data (little-endian)
|
||||
/// - Bytes 4-7: CRC32 checksum of uncompressed data (little-endian)
|
||||
#[derive(Debug)]
|
||||
pub struct DecompressReader<R> {
|
||||
#[pin]
|
||||
@@ -173,11 +194,11 @@ pin_project! {
|
||||
buffer: Vec<u8>,
|
||||
buffer_pos: usize,
|
||||
finished: bool,
|
||||
// New fields for saving header read progress across polls
|
||||
// Fields for saving header read progress across polls
|
||||
header_buf: [u8; 8],
|
||||
header_read: usize,
|
||||
header_done: bool,
|
||||
// New fields for saving compressed block read progress across polls
|
||||
// Fields for saving compressed block read progress across polls
|
||||
compressed_buf: Option<Vec<u8>>,
|
||||
compressed_read: usize,
|
||||
compressed_len: usize,
|
||||
@@ -187,7 +208,7 @@ pin_project! {
|
||||
|
||||
impl<R> DecompressReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
pub fn new(inner: R, compression_algorithm: CompressionAlgorithm) -> Self {
|
||||
Self {
|
||||
@@ -212,9 +233,9 @@ where
|
||||
{
|
||||
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
|
||||
let mut this = self.project();
|
||||
// Serve from buffer if any
|
||||
// Copy from buffer first if available
|
||||
if *this.buffer_pos < this.buffer.len() {
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
|
||||
let to_copy = min(buf.remaining(), this.buffer.len() - *this.buffer_pos);
|
||||
buf.put_slice(&this.buffer[*this.buffer_pos..*this.buffer_pos + to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
if *this.buffer_pos == this.buffer.len() {
|
||||
@@ -223,15 +244,13 @@ where
|
||||
}
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
if *this.finished {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Read header, support saving progress across polls
|
||||
while !*this.header_done && *this.header_read < 8 {
|
||||
let mut temp = [0u8; 8];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp[0..8 - *this.header_read]);
|
||||
// Read header
|
||||
while !*this.header_done && *this.header_read < HEADER_LEN {
|
||||
let mut temp = [0u8; HEADER_LEN];
|
||||
let mut temp_buf = ReadBuf::new(&mut temp[0..HEADER_LEN - *this.header_read]);
|
||||
match this.inner.as_mut().poll_read(cx, &mut temp_buf) {
|
||||
Poll::Pending => return Poll::Pending,
|
||||
Poll::Ready(Ok(())) => {
|
||||
@@ -243,34 +262,27 @@ where
|
||||
*this.header_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
// error!("DecompressReader poll_read: read header error: {e}");
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
if *this.header_read < 8 {
|
||||
// Header not fully read, return Pending or Ok, wait for next poll
|
||||
if *this.header_read < HEADER_LEN {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
|
||||
if !*this.header_done && *this.header_read == 0 {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
let typ = this.header_buf[0];
|
||||
let len = (this.header_buf[1] as usize) | ((this.header_buf[2] as usize) << 8) | ((this.header_buf[3] as usize) << 16);
|
||||
let crc = (this.header_buf[4] as u32)
|
||||
| ((this.header_buf[5] as u32) << 8)
|
||||
| ((this.header_buf[6] as u32) << 16)
|
||||
| ((this.header_buf[7] as u32) << 24);
|
||||
|
||||
// Header is used up, reset header_read
|
||||
*this.header_read = 0;
|
||||
*this.header_done = true;
|
||||
|
||||
if typ == 0xFF {
|
||||
*this.finished = true;
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
|
||||
// Save compressed block read progress across polls
|
||||
if this.compressed_buf.is_none() {
|
||||
*this.compressed_len = len - 4;
|
||||
*this.compressed_len = len;
|
||||
*this.compressed_buf = Some(vec![0u8; *this.compressed_len]);
|
||||
*this.compressed_read = 0;
|
||||
}
|
||||
@@ -287,6 +299,7 @@ where
|
||||
*this.compressed_read += n;
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
// error!("DecompressReader poll_read: read compressed block error: {e}");
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
@@ -294,44 +307,44 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// After reading all, unpack
|
||||
let (uncompress_len, uvarint) = uvarint(&compressed_buf[0..16]);
|
||||
let compressed_data = &compressed_buf[uvarint as usize..];
|
||||
let decompressed = if typ == 0x00 {
|
||||
let decompressed = if typ == COMPRESS_TYPE_COMPRESSED {
|
||||
match decompress_block(compressed_data, *this.compression_algorithm) {
|
||||
Ok(out) => out,
|
||||
Err(e) => {
|
||||
// error!("DecompressReader decompress_block error: {e}");
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(e));
|
||||
}
|
||||
}
|
||||
} else if typ == 0x01 {
|
||||
} else if typ == COMPRESS_TYPE_UNCOMPRESSED {
|
||||
compressed_data.to_vec()
|
||||
} else if typ == 0xFF {
|
||||
// Handle end marker
|
||||
} else if typ == COMPRESS_TYPE_END {
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
*this.finished = true;
|
||||
return Poll::Ready(Ok(()));
|
||||
} else {
|
||||
// error!("DecompressReader unknown compression type: {typ}");
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Unknown compression type")));
|
||||
};
|
||||
if decompressed.len() != uncompress_len as usize {
|
||||
// error!("DecompressReader decompressed length mismatch: {} != {}", decompressed.len(), uncompress_len);
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Decompressed length mismatch")));
|
||||
}
|
||||
|
||||
let actual_crc = crc32fast::hash(&decompressed);
|
||||
if actual_crc != crc {
|
||||
// error!("DecompressReader CRC32 mismatch: actual {actual_crc} != expected {crc}");
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
@@ -339,15 +352,17 @@ where
|
||||
}
|
||||
*this.buffer = decompressed;
|
||||
*this.buffer_pos = 0;
|
||||
// Clear compressed block state for next block
|
||||
this.compressed_buf.take();
|
||||
*this.compressed_read = 0;
|
||||
*this.compressed_len = 0;
|
||||
*this.header_done = false;
|
||||
let to_copy = std::cmp::min(buf.remaining(), this.buffer.len());
|
||||
let to_copy = min(buf.remaining(), this.buffer.len());
|
||||
buf.put_slice(&this.buffer[..to_copy]);
|
||||
*this.buffer_pos += to_copy;
|
||||
|
||||
if *this.buffer_pos == this.buffer.len() {
|
||||
this.buffer.clear();
|
||||
*this.buffer_pos = 0;
|
||||
}
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
}
|
||||
@@ -373,8 +388,34 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Build compressed block with header + uvarint + compressed data
|
||||
fn build_compressed_block(uncompressed_data: &[u8], compression_algorithm: CompressionAlgorithm) -> Vec<u8> {
|
||||
let crc = crc32fast::hash(uncompressed_data);
|
||||
let compressed_data = compress_block(uncompressed_data, compression_algorithm);
|
||||
let uncompressed_len = uncompressed_data.len();
|
||||
let mut uncompressed_len_buf = [0u8; 10];
|
||||
let int_len = put_uvarint(&mut uncompressed_len_buf[..], uncompressed_len as u64);
|
||||
let len = compressed_data.len() + int_len;
|
||||
let mut header = [0u8; HEADER_LEN];
|
||||
header[0] = COMPRESS_TYPE_COMPRESSED;
|
||||
header[1] = (len & 0xFF) as u8;
|
||||
header[2] = ((len >> 8) & 0xFF) as u8;
|
||||
header[3] = ((len >> 16) & 0xFF) as u8;
|
||||
header[4] = (crc & 0xFF) as u8;
|
||||
header[5] = ((crc >> 8) & 0xFF) as u8;
|
||||
header[6] = ((crc >> 16) & 0xFF) as u8;
|
||||
header[7] = ((crc >> 24) & 0xFF) as u8;
|
||||
let mut out = Vec::with_capacity(len + HEADER_LEN);
|
||||
out.extend_from_slice(&header);
|
||||
out.extend_from_slice(&uncompressed_len_buf[..int_len]);
|
||||
out.extend_from_slice(&compressed_data);
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::WarpReader;
|
||||
|
||||
use super::*;
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
@@ -383,7 +424,7 @@ mod tests {
|
||||
async fn test_compress_reader_basic() {
|
||||
let data = b"hello world, hello world, hello world!";
|
||||
let reader = Cursor::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
@@ -400,7 +441,7 @@ mod tests {
|
||||
async fn test_compress_reader_basic_deflate() {
|
||||
let data = b"hello world, hello world, hello world!";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
|
||||
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Deflate);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
@@ -417,7 +458,7 @@ mod tests {
|
||||
async fn test_compress_reader_empty() {
|
||||
let data = b"";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
@@ -436,7 +477,7 @@ mod tests {
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
rand::rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Gzip);
|
||||
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::Gzip);
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
@@ -452,15 +493,15 @@ mod tests {
|
||||
async fn test_compress_reader_large_deflate() {
|
||||
use rand::Rng;
|
||||
// Generate 1MB of random bytes
|
||||
let mut data = vec![0u8; 1024 * 1024];
|
||||
let mut data = vec![0u8; 1024 * 1024 * 3 + 512];
|
||||
rand::rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut compress_reader = CompressReader::new(reader, CompressionAlgorithm::Deflate);
|
||||
let mut compress_reader = CompressReader::new(WarpReader::new(reader), CompressionAlgorithm::default());
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
compress_reader.read_to_end(&mut compressed).await.unwrap();
|
||||
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::Deflate);
|
||||
let mut decompress_reader = DecompressReader::new(Cursor::new(compressed.clone()), CompressionAlgorithm::default());
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::HashReaderDetector;
|
||||
use crate::HashReaderMut;
|
||||
use crate::compress_index::{Index, TryGetIndex};
|
||||
use crate::{EtagResolvable, Reader};
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
@@ -145,6 +146,15 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> TryGetIndex for EncryptReader<R>
|
||||
where
|
||||
R: TryGetIndex,
|
||||
{
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
self.inner.try_get_index()
|
||||
}
|
||||
}
|
||||
|
||||
pin_project! {
|
||||
/// A reader wrapper that decrypts data on the fly using AES-256-GCM.
|
||||
/// This is a demonstration. For production, use a secure and audited crypto library.
|
||||
@@ -339,6 +349,8 @@ where
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use crate::WarpReader;
|
||||
|
||||
use super::*;
|
||||
use rand::RngCore;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
@@ -352,7 +364,7 @@ mod tests {
|
||||
rand::rng().fill_bytes(&mut nonce);
|
||||
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
|
||||
// Encrypt
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
@@ -361,7 +373,7 @@ mod tests {
|
||||
|
||||
// Decrypt using DecryptReader
|
||||
let reader = Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
@@ -380,7 +392,7 @@ mod tests {
|
||||
|
||||
// Encrypt
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
let mut encrypted = Vec::new();
|
||||
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
|
||||
@@ -388,7 +400,7 @@ mod tests {
|
||||
// Now test DecryptReader
|
||||
|
||||
let reader = Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
@@ -408,13 +420,13 @@ mod tests {
|
||||
rand::rng().fill_bytes(&mut nonce);
|
||||
|
||||
let reader = std::io::Cursor::new(data.clone());
|
||||
let encrypt_reader = EncryptReader::new(reader, key, nonce);
|
||||
let encrypt_reader = EncryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
let mut encrypted = Vec::new();
|
||||
encrypt_reader.read_to_end(&mut encrypted).await.unwrap();
|
||||
|
||||
let reader = std::io::Cursor::new(encrypted.clone());
|
||||
let decrypt_reader = DecryptReader::new(reader, key, nonce);
|
||||
let decrypt_reader = DecryptReader::new(WarpReader::new(reader), key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted).await.unwrap();
|
||||
|
||||
@@ -17,14 +17,15 @@ The `EtagResolvable` trait provides a clean way to handle recursive unwrapping:
|
||||
|
||||
```rust
|
||||
use rustfs_rio::{CompressReader, EtagReader, resolve_etag_generic};
|
||||
use rustfs_rio::compress::CompressionAlgorithm;
|
||||
use rustfs_rio::WarpReader;
|
||||
use rustfs_utils::compress::CompressionAlgorithm;
|
||||
use tokio::io::BufReader;
|
||||
use std::io::Cursor;
|
||||
|
||||
// Direct usage with trait-based approach
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let etag_reader = EtagReader::new(reader, Some("test_etag".to_string()));
|
||||
let mut reader = CompressReader::new(etag_reader, CompressionAlgorithm::Gzip);
|
||||
let etag = resolve_etag_generic(&mut reader);
|
||||
@@ -34,9 +35,9 @@ let etag = resolve_etag_generic(&mut reader);
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::resolve_etag_generic;
|
||||
use crate::{CompressReader, EncryptReader, EtagReader, HashReader};
|
||||
use crate::{WarpReader, resolve_etag_generic};
|
||||
use rustfs_utils::compress::CompressionAlgorithm;
|
||||
use std::io::Cursor;
|
||||
use tokio::io::BufReader;
|
||||
|
||||
@@ -44,7 +45,7 @@ mod tests {
|
||||
fn test_etag_reader_resolution() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, Some("test_etag".to_string()));
|
||||
|
||||
// Test direct ETag resolution
|
||||
@@ -55,7 +56,7 @@ mod tests {
|
||||
fn test_hash_reader_resolution() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_etag".to_string()), false).unwrap();
|
||||
|
||||
@@ -67,7 +68,7 @@ mod tests {
|
||||
fn test_compress_reader_delegation() {
|
||||
let data = b"test data for compression";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let etag_reader = EtagReader::new(reader, Some("compress_etag".to_string()));
|
||||
let mut compress_reader = CompressReader::new(etag_reader, CompressionAlgorithm::Gzip);
|
||||
|
||||
@@ -79,7 +80,7 @@ mod tests {
|
||||
fn test_encrypt_reader_delegation() {
|
||||
let data = b"test data for encryption";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let etag_reader = EtagReader::new(reader, Some("encrypt_etag".to_string()));
|
||||
|
||||
let key = [0u8; 32];
|
||||
@@ -94,7 +95,7 @@ mod tests {
|
||||
fn test_complex_nesting() {
|
||||
let data = b"test data for complex nesting";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
// Create a complex nested structure: CompressReader<EncryptReader<EtagReader<BufReader<Cursor>>>>
|
||||
let etag_reader = EtagReader::new(reader, Some("nested_etag".to_string()));
|
||||
let key = [0u8; 32];
|
||||
@@ -110,7 +111,7 @@ mod tests {
|
||||
fn test_hash_reader_in_nested_structure() {
|
||||
let data = b"test data for hash reader nesting";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
// Create nested structure: CompressReader<HashReader<BufReader<Cursor>>>
|
||||
let hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("hash_nested_etag".to_string()), false).unwrap();
|
||||
@@ -127,14 +128,14 @@ mod tests {
|
||||
// Test 1: Simple EtagReader
|
||||
let data1 = b"simple test";
|
||||
let reader1 = BufReader::new(Cursor::new(&data1[..]));
|
||||
let reader1 = Box::new(reader1);
|
||||
let reader1 = Box::new(WarpReader::new(reader1));
|
||||
let mut etag_reader = EtagReader::new(reader1, Some("simple_etag".to_string()));
|
||||
assert_eq!(resolve_etag_generic(&mut etag_reader), Some("simple_etag".to_string()));
|
||||
|
||||
// Test 2: HashReader with ETag
|
||||
let data2 = b"hash test";
|
||||
let reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let reader2 = Box::new(WarpReader::new(reader2));
|
||||
let mut hash_reader =
|
||||
HashReader::new(reader2, data2.len() as i64, data2.len() as i64, Some("hash_etag".to_string()), false).unwrap();
|
||||
assert_eq!(resolve_etag_generic(&mut hash_reader), Some("hash_etag".to_string()));
|
||||
@@ -142,7 +143,7 @@ mod tests {
|
||||
// Test 3: Single wrapper - CompressReader<EtagReader>
|
||||
let data3 = b"compress test";
|
||||
let reader3 = BufReader::new(Cursor::new(&data3[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let reader3 = Box::new(WarpReader::new(reader3));
|
||||
let etag_reader3 = EtagReader::new(reader3, Some("compress_wrapped_etag".to_string()));
|
||||
let mut compress_reader = CompressReader::new(etag_reader3, CompressionAlgorithm::Zstd);
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader), Some("compress_wrapped_etag".to_string()));
|
||||
@@ -150,7 +151,7 @@ mod tests {
|
||||
// Test 4: Double wrapper - CompressReader<EncryptReader<EtagReader>>
|
||||
let data4 = b"double wrap test";
|
||||
let reader4 = BufReader::new(Cursor::new(&data4[..]));
|
||||
let reader4 = Box::new(reader4);
|
||||
let reader4 = Box::new(WarpReader::new(reader4));
|
||||
let etag_reader4 = EtagReader::new(reader4, Some("double_wrapped_etag".to_string()));
|
||||
let key = [1u8; 32];
|
||||
let nonce = [1u8; 12];
|
||||
@@ -172,7 +173,7 @@ mod tests {
|
||||
|
||||
let data = b"Real world test data that might be compressed and encrypted";
|
||||
let base_reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let base_reader = Box::new(base_reader);
|
||||
let base_reader = Box::new(WarpReader::new(base_reader));
|
||||
// Create a complex nested structure that might occur in practice:
|
||||
// CompressReader<EncryptReader<HashReader<BufReader<Cursor>>>>
|
||||
let hash_reader = HashReader::new(
|
||||
@@ -197,7 +198,7 @@ mod tests {
|
||||
// Test another complex nesting with EtagReader at the core
|
||||
let data2 = b"Another real world scenario";
|
||||
let base_reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let base_reader2 = Box::new(base_reader2);
|
||||
let base_reader2 = Box::new(WarpReader::new(base_reader2));
|
||||
let etag_reader = EtagReader::new(base_reader2, Some("core_etag".to_string()));
|
||||
let key2 = [99u8; 32];
|
||||
let nonce2 = [88u8; 12];
|
||||
@@ -223,21 +224,21 @@ mod tests {
|
||||
// Test with HashReader that has no etag
|
||||
let data = b"no etag test";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut hash_reader_no_etag = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
assert_eq!(resolve_etag_generic(&mut hash_reader_no_etag), None);
|
||||
|
||||
// Test with EtagReader that has None etag
|
||||
let data2 = b"no etag test 2";
|
||||
let reader2 = BufReader::new(Cursor::new(&data2[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let reader2 = Box::new(WarpReader::new(reader2));
|
||||
let mut etag_reader_none = EtagReader::new(reader2, None);
|
||||
assert_eq!(resolve_etag_generic(&mut etag_reader_none), None);
|
||||
|
||||
// Test nested structure with no ETag at the core
|
||||
let data3 = b"nested no etag test";
|
||||
let reader3 = BufReader::new(Cursor::new(&data3[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let reader3 = Box::new(WarpReader::new(reader3));
|
||||
let etag_reader3 = EtagReader::new(reader3, None);
|
||||
let mut compress_reader3 = CompressReader::new(etag_reader3, CompressionAlgorithm::Gzip);
|
||||
assert_eq!(resolve_etag_generic(&mut compress_reader3), None);
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::compress_index::{Index, TryGetIndex};
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
use md5::{Digest, Md5};
|
||||
use pin_project_lite::pin_project;
|
||||
@@ -82,8 +83,16 @@ impl HashReaderDetector for EtagReader {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryGetIndex for EtagReader {
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
self.inner.try_get_index()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::WarpReader;
|
||||
|
||||
use super::*;
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
@@ -95,7 +104,7 @@ mod tests {
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
@@ -114,7 +123,7 @@ mod tests {
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
@@ -133,7 +142,7 @@ mod tests {
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
@@ -150,7 +159,7 @@ mod tests {
|
||||
async fn test_etag_reader_not_finished() {
|
||||
let data = b"abc123";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
// Do not read to end, etag should be None
|
||||
@@ -174,7 +183,7 @@ mod tests {
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
|
||||
let reader = Cursor::new(data.clone());
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, None);
|
||||
|
||||
let mut buf = Vec::new();
|
||||
@@ -193,7 +202,7 @@ mod tests {
|
||||
hasher.update(data);
|
||||
let expected = format!("{:x}", hasher.finalize());
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, Some(expected.clone()));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
@@ -209,7 +218,7 @@ mod tests {
|
||||
let data = b"checksum test data";
|
||||
let wrong_checksum = "deadbeefdeadbeefdeadbeefdeadbeef".to_string();
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut etag_reader = EtagReader::new(reader, Some(wrong_checksum));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
use crate::compress_index::{Index, TryGetIndex};
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
use pin_project_lite::pin_project;
|
||||
use std::io::{Error, Result};
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
|
||||
use pin_project_lite::pin_project;
|
||||
|
||||
pin_project! {
|
||||
pub struct HardLimitReader {
|
||||
#[pin]
|
||||
@@ -60,10 +59,18 @@ impl HashReaderDetector for HardLimitReader {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryGetIndex for HardLimitReader {
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
self.inner.try_get_index()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::vec;
|
||||
|
||||
use crate::WarpReader;
|
||||
|
||||
use super::*;
|
||||
use rustfs_utils::read_full;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
@@ -72,7 +79,7 @@ mod tests {
|
||||
async fn test_hardlimit_reader_normal() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hardlimit = HardLimitReader::new(reader, 20);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
@@ -85,7 +92,7 @@ mod tests {
|
||||
async fn test_hardlimit_reader_exact_limit() {
|
||||
let data = b"1234567890";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hardlimit = HardLimitReader::new(reader, 10);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
@@ -98,7 +105,7 @@ mod tests {
|
||||
async fn test_hardlimit_reader_exceed_limit() {
|
||||
let data = b"abcdef";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hardlimit = HardLimitReader::new(reader, 3);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = vec![0u8; 10];
|
||||
@@ -123,7 +130,7 @@ mod tests {
|
||||
async fn test_hardlimit_reader_empty() {
|
||||
let data = b"";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hardlimit = HardLimitReader::new(reader, 5);
|
||||
let mut r = hardlimit;
|
||||
let mut buf = Vec::new();
|
||||
|
||||
@@ -24,11 +24,12 @@
|
||||
//! use rustfs_rio::{HashReader, HardLimitReader, EtagReader};
|
||||
//! use tokio::io::BufReader;
|
||||
//! use std::io::Cursor;
|
||||
//! use rustfs_rio::WarpReader;
|
||||
//!
|
||||
//! # tokio_test::block_on(async {
|
||||
//! let data = b"hello world";
|
||||
//! let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let reader = Box::new(reader);
|
||||
//! let reader = Box::new(WarpReader::new(reader));
|
||||
//! let size = data.len() as i64;
|
||||
//! let actual_size = size;
|
||||
//! let etag = None;
|
||||
@@ -39,7 +40,7 @@
|
||||
//!
|
||||
//! // Method 2: With manual wrapping to recreate original logic
|
||||
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let reader2 = Box::new(reader2);
|
||||
//! let reader2 = Box::new(WarpReader::new(reader2));
|
||||
//! let wrapped_reader: Box<dyn rustfs_rio::Reader> = if size > 0 {
|
||||
//! if !diskable_md5 {
|
||||
//! // Wrap with both HardLimitReader and EtagReader
|
||||
@@ -68,18 +69,19 @@
|
||||
//! use rustfs_rio::{HashReader, HashReaderDetector};
|
||||
//! use tokio::io::BufReader;
|
||||
//! use std::io::Cursor;
|
||||
//! use rustfs_rio::WarpReader;
|
||||
//!
|
||||
//! # tokio_test::block_on(async {
|
||||
//! let data = b"test";
|
||||
//! let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let hash_reader = HashReader::new(Box::new(reader), 4, 4, None, false).unwrap();
|
||||
//! let hash_reader = HashReader::new(Box::new(WarpReader::new(reader)), 4, 4, None, false).unwrap();
|
||||
//!
|
||||
//! // Check if a type is a HashReader
|
||||
//! assert!(hash_reader.is_hash_reader());
|
||||
//!
|
||||
//! // Use new for compatibility (though it's simpler to use new() directly)
|
||||
//! let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
//! let result = HashReader::new(Box::new(reader2), 4, 4, None, false);
|
||||
//! let result = HashReader::new(Box::new(WarpReader::new(reader2)), 4, 4, None, false);
|
||||
//! assert!(result.is_ok());
|
||||
//! # });
|
||||
//! ```
|
||||
@@ -89,6 +91,7 @@ use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::compress_index::{Index, TryGetIndex};
|
||||
use crate::{EtagReader, EtagResolvable, HardLimitReader, HashReaderDetector, Reader};
|
||||
|
||||
/// Trait for mutable operations on HashReader
|
||||
@@ -283,10 +286,16 @@ impl HashReaderDetector for HashReader {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryGetIndex for HashReader {
|
||||
fn try_get_index(&self) -> Option<&Index> {
|
||||
self.inner.try_get_index()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{DecryptReader, encrypt_reader};
|
||||
use crate::{DecryptReader, WarpReader, encrypt_reader};
|
||||
use std::io::Cursor;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
|
||||
@@ -299,14 +308,14 @@ mod tests {
|
||||
|
||||
// Test 1: Simple creation
|
||||
let reader1 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader1 = Box::new(reader1);
|
||||
let reader1 = Box::new(WarpReader::new(reader1));
|
||||
let hash_reader1 = HashReader::new(reader1, size, actual_size, etag.clone(), false).unwrap();
|
||||
assert_eq!(hash_reader1.size(), size);
|
||||
assert_eq!(hash_reader1.actual_size(), actual_size);
|
||||
|
||||
// Test 2: With HardLimitReader wrapping
|
||||
let reader2 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader2 = Box::new(reader2);
|
||||
let reader2 = Box::new(WarpReader::new(reader2));
|
||||
let hard_limit = HardLimitReader::new(reader2, size);
|
||||
let hard_limit = Box::new(hard_limit);
|
||||
let hash_reader2 = HashReader::new(hard_limit, size, actual_size, etag.clone(), false).unwrap();
|
||||
@@ -315,7 +324,7 @@ mod tests {
|
||||
|
||||
// Test 3: With EtagReader wrapping
|
||||
let reader3 = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader3 = Box::new(reader3);
|
||||
let reader3 = Box::new(WarpReader::new(reader3));
|
||||
let etag_reader = EtagReader::new(reader3, etag.clone());
|
||||
let etag_reader = Box::new(etag_reader);
|
||||
let hash_reader3 = HashReader::new(etag_reader, size, actual_size, etag.clone(), false).unwrap();
|
||||
@@ -327,7 +336,7 @@ mod tests {
|
||||
async fn test_hashreader_etag_basic() {
|
||||
let data = b"hello hashreader";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
|
||||
@@ -341,7 +350,7 @@ mod tests {
|
||||
async fn test_hashreader_diskable_md5() {
|
||||
let data = b"no etag";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let mut hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, true).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
let _ = hash_reader.read_to_end(&mut buf).await.unwrap();
|
||||
@@ -355,11 +364,11 @@ mod tests {
|
||||
async fn test_hashreader_new_logic() {
|
||||
let data = b"test data";
|
||||
let reader = BufReader::new(Cursor::new(&data[..]));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
// Create a HashReader first
|
||||
let hash_reader =
|
||||
HashReader::new(reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false).unwrap();
|
||||
let hash_reader = Box::new(hash_reader);
|
||||
let hash_reader = Box::new(WarpReader::new(hash_reader));
|
||||
// Now try to create another HashReader from the existing one using new
|
||||
let result = HashReader::new(hash_reader, data.len() as i64, data.len() as i64, Some("test_etag".to_string()), false);
|
||||
|
||||
@@ -371,11 +380,11 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_for_wrapping_readers() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
use md5::{Digest, Md5};
|
||||
use rand::Rng;
|
||||
use rand::RngCore;
|
||||
use rustfs_utils::compress::CompressionAlgorithm;
|
||||
|
||||
// Generate 1MB random data
|
||||
let size = 1024 * 1024;
|
||||
@@ -397,7 +406,7 @@ mod tests {
|
||||
let size = data.len() as i64;
|
||||
let actual_size = data.len() as i64;
|
||||
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
// 创建 HashReader
|
||||
let mut hr = HashReader::new(reader, size, actual_size, Some(expected.clone()), false).unwrap();
|
||||
|
||||
@@ -427,7 +436,7 @@ mod tests {
|
||||
|
||||
if is_encrypt {
|
||||
// 加密压缩后的数据
|
||||
let encrypt_reader = encrypt_reader::EncryptReader::new(Cursor::new(compressed_data), key, nonce);
|
||||
let encrypt_reader = encrypt_reader::EncryptReader::new(WarpReader::new(Cursor::new(compressed_data)), key, nonce);
|
||||
let mut encrypted_data = Vec::new();
|
||||
let mut encrypt_reader = encrypt_reader;
|
||||
encrypt_reader.read_to_end(&mut encrypted_data).await.unwrap();
|
||||
@@ -435,14 +444,15 @@ mod tests {
|
||||
println!("Encrypted size: {}", encrypted_data.len());
|
||||
|
||||
// 解密数据
|
||||
let decrypt_reader = DecryptReader::new(Cursor::new(encrypted_data), key, nonce);
|
||||
let decrypt_reader = DecryptReader::new(WarpReader::new(Cursor::new(encrypted_data)), key, nonce);
|
||||
let mut decrypt_reader = decrypt_reader;
|
||||
let mut decrypted_data = Vec::new();
|
||||
decrypt_reader.read_to_end(&mut decrypted_data).await.unwrap();
|
||||
|
||||
if is_compress {
|
||||
// 如果使用了压缩,需要解压缩
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(decrypted_data), CompressionAlgorithm::Gzip);
|
||||
let decompress_reader =
|
||||
DecompressReader::new(WarpReader::new(Cursor::new(decrypted_data)), CompressionAlgorithm::Gzip);
|
||||
let mut decompress_reader = decompress_reader;
|
||||
let mut final_data = Vec::new();
|
||||
decompress_reader.read_to_end(&mut final_data).await.unwrap();
|
||||
@@ -460,7 +470,8 @@ mod tests {
|
||||
|
||||
// 如果不加密,直接处理压缩/解压缩
|
||||
if is_compress {
|
||||
let decompress_reader = DecompressReader::new(Cursor::new(compressed_data), CompressionAlgorithm::Gzip);
|
||||
let decompress_reader =
|
||||
DecompressReader::new(WarpReader::new(Cursor::new(compressed_data)), CompressionAlgorithm::Gzip);
|
||||
let mut decompress_reader = decompress_reader;
|
||||
let mut decompressed = Vec::new();
|
||||
decompress_reader.read_to_end(&mut decompressed).await.unwrap();
|
||||
@@ -481,8 +492,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compression_with_compressible_data() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
use rustfs_utils::compress::CompressionAlgorithm;
|
||||
|
||||
// Create highly compressible data (repeated pattern)
|
||||
let pattern = b"Hello, World! This is a test pattern that should compress well. ";
|
||||
@@ -495,7 +506,7 @@ mod tests {
|
||||
println!("Original data size: {} bytes", data.len());
|
||||
|
||||
let reader = BufReader::new(Cursor::new(data.clone()));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
|
||||
// Test compression
|
||||
@@ -525,8 +536,8 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compression_algorithms() {
|
||||
use crate::compress::CompressionAlgorithm;
|
||||
use crate::{CompressReader, DecompressReader};
|
||||
use rustfs_utils::compress::CompressionAlgorithm;
|
||||
|
||||
let data = b"This is test data for compression algorithm testing. ".repeat(1000);
|
||||
println!("Testing with {} bytes of data", data.len());
|
||||
@@ -541,7 +552,7 @@ mod tests {
|
||||
println!("\nTesting algorithm: {:?}", algorithm);
|
||||
|
||||
let reader = BufReader::new(Cursor::new(data.clone()));
|
||||
let reader = Box::new(reader);
|
||||
let reader = Box::new(WarpReader::new(reader));
|
||||
let hash_reader = HashReader::new(reader, data.len() as i64, data.len() as i64, None, false).unwrap();
|
||||
|
||||
// Compress
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
mod limit_reader;
|
||||
use std::io::Cursor;
|
||||
|
||||
pub use limit_reader::LimitReader;
|
||||
|
||||
mod etag_reader;
|
||||
pub use etag_reader::EtagReader;
|
||||
|
||||
mod compress_index;
|
||||
mod compress_reader;
|
||||
pub use compress_reader::{CompressReader, DecompressReader};
|
||||
|
||||
@@ -18,21 +18,20 @@ pub use hardlimit_reader::HardLimitReader;
|
||||
mod hash_reader;
|
||||
pub use hash_reader::*;
|
||||
|
||||
pub mod compress;
|
||||
|
||||
pub mod reader;
|
||||
pub use reader::WarpReader;
|
||||
|
||||
mod writer;
|
||||
use tokio::io::{AsyncRead, BufReader};
|
||||
pub use writer::*;
|
||||
|
||||
mod http_reader;
|
||||
pub use http_reader::*;
|
||||
|
||||
pub use compress_index::TryGetIndex;
|
||||
|
||||
mod etag;
|
||||
|
||||
pub trait Reader: tokio::io::AsyncRead + Unpin + Send + Sync + EtagResolvable + HashReaderDetector {}
|
||||
pub trait Reader: tokio::io::AsyncRead + Unpin + Send + Sync + EtagResolvable + HashReaderDetector + TryGetIndex {}
|
||||
|
||||
// Trait for types that can be recursively searched for etag capability
|
||||
pub trait EtagResolvable {
|
||||
@@ -52,12 +51,6 @@ where
|
||||
reader.try_resolve_etag()
|
||||
}
|
||||
|
||||
impl<T> EtagResolvable for BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> EtagResolvable for Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> EtagResolvable for Box<T> where T: EtagResolvable {}
|
||||
|
||||
/// Trait to detect and manipulate HashReader instances
|
||||
pub trait HashReaderDetector {
|
||||
fn is_hash_reader(&self) -> bool {
|
||||
@@ -69,41 +62,8 @@ pub trait HashReaderDetector {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> HashReaderDetector for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> HashReaderDetector for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl HashReaderDetector for Box<dyn AsyncRead + Unpin + Send + Sync> {}
|
||||
|
||||
impl<T> HashReaderDetector for Box<T> where T: HashReaderDetector {}
|
||||
|
||||
// Blanket implementations for Reader trait
|
||||
impl<T> Reader for tokio::io::BufReader<T> where T: AsyncRead + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> Reader for std::io::Cursor<T> where T: AsRef<[u8]> + Unpin + Send + Sync {}
|
||||
|
||||
impl<T> Reader for Box<T> where T: Reader {}
|
||||
|
||||
// Forward declarations for wrapper types that implement all required traits
|
||||
impl Reader for crate::HashReader {}
|
||||
|
||||
impl Reader for HttpReader {}
|
||||
|
||||
impl Reader for crate::HardLimitReader {}
|
||||
impl Reader for crate::EtagReader {}
|
||||
|
||||
impl<R> Reader for crate::EncryptReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::DecryptReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::CompressReader<R> where R: Reader {}
|
||||
|
||||
impl<R> Reader for crate::DecompressReader<R> where R: Reader {}
|
||||
|
||||
impl Reader for tokio::fs::File {}
|
||||
impl HashReaderDetector for tokio::fs::File {}
|
||||
impl EtagResolvable for tokio::fs::File {}
|
||||
|
||||
impl Reader for tokio::io::DuplexStream {}
|
||||
impl HashReaderDetector for tokio::io::DuplexStream {}
|
||||
impl EtagResolvable for tokio::io::DuplexStream {}
|
||||
impl<R> Reader for crate::EncryptReader<R> where R: Reader {}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
//! async fn main() {
|
||||
//! let data = b"hello world";
|
||||
//! let reader = BufReader::new(&data[..]);
|
||||
//! let mut limit_reader = LimitReader::new(reader, data.len() as u64);
|
||||
//! let mut limit_reader = LimitReader::new(reader, data.len());
|
||||
//!
|
||||
//! let mut buf = Vec::new();
|
||||
//! let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
@@ -23,25 +23,25 @@ use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut, Reader};
|
||||
use crate::{EtagResolvable, HashReaderDetector, HashReaderMut};
|
||||
|
||||
pin_project! {
|
||||
#[derive(Debug)]
|
||||
pub struct LimitReader<R> {
|
||||
#[pin]
|
||||
pub inner: R,
|
||||
limit: u64,
|
||||
read: u64,
|
||||
limit: usize,
|
||||
read: usize,
|
||||
}
|
||||
}
|
||||
|
||||
/// A wrapper for AsyncRead that limits the total number of bytes read.
|
||||
impl<R> LimitReader<R>
|
||||
where
|
||||
R: Reader,
|
||||
R: AsyncRead + Unpin + Send + Sync,
|
||||
{
|
||||
/// Create a new LimitReader wrapping `inner`, with a total read limit of `limit` bytes.
|
||||
pub fn new(inner: R, limit: u64) -> Self {
|
||||
pub fn new(inner: R, limit: usize) -> Self {
|
||||
Self { inner, limit, read: 0 }
|
||||
}
|
||||
}
|
||||
@@ -57,7 +57,7 @@ where
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
let orig_remaining = buf.remaining();
|
||||
let allowed = remaining.min(orig_remaining as u64) as usize;
|
||||
let allowed = remaining.min(orig_remaining);
|
||||
if allowed == 0 {
|
||||
return Poll::Ready(Ok(()));
|
||||
}
|
||||
@@ -66,7 +66,7 @@ where
|
||||
let poll = this.inner.as_mut().poll_read(cx, buf);
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let n = buf.filled().len() - before_size;
|
||||
*this.read += n as u64;
|
||||
*this.read += n;
|
||||
}
|
||||
poll
|
||||
} else {
|
||||
@@ -76,7 +76,7 @@ where
|
||||
if let Poll::Ready(Ok(())) = &poll {
|
||||
let n = temp_buf.filled().len();
|
||||
buf.put_slice(temp_buf.filled());
|
||||
*this.read += n as u64;
|
||||
*this.read += n;
|
||||
}
|
||||
poll
|
||||
}
|
||||
@@ -115,7 +115,7 @@ mod tests {
|
||||
async fn test_limit_reader_exact() {
|
||||
let data = b"hello world";
|
||||
let reader = BufReader::new(&data[..]);
|
||||
let mut limit_reader = LimitReader::new(reader, data.len() as u64);
|
||||
let mut limit_reader = LimitReader::new(reader, data.len());
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let n = limit_reader.read_to_end(&mut buf).await.unwrap();
|
||||
@@ -176,7 +176,7 @@ mod tests {
|
||||
let mut data = vec![0u8; size];
|
||||
rand::rng().fill(&mut data[..]);
|
||||
let reader = Cursor::new(data.clone());
|
||||
let mut limit_reader = LimitReader::new(reader, size as u64);
|
||||
let mut limit_reader = LimitReader::new(reader, size);
|
||||
|
||||
// Read data into buffer
|
||||
let mut buf = Vec::new();
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
|
||||
use crate::compress_index::TryGetIndex;
|
||||
use crate::{EtagResolvable, HashReaderDetector, Reader};
|
||||
|
||||
pub struct WarpReader<R> {
|
||||
@@ -24,4 +25,6 @@ impl<R: AsyncRead + Unpin + Send + Sync> HashReaderDetector for WarpReader<R> {}
|
||||
|
||||
impl<R: AsyncRead + Unpin + Send + Sync> EtagResolvable for WarpReader<R> {}
|
||||
|
||||
impl<R: AsyncRead + Unpin + Send + Sync> TryGetIndex for WarpReader<R> {}
|
||||
|
||||
impl<R: AsyncRead + Unpin + Send + Sync> Reader for WarpReader<R> {}
|
||||
|
||||
@@ -29,10 +29,15 @@ tempfile = { workspace = true, optional = true }
|
||||
tokio = { workspace = true, optional = true, features = ["io-util", "macros"] }
|
||||
tracing = { workspace = true }
|
||||
url = { workspace = true , optional = true}
|
||||
|
||||
flate2 = { workspace = true , optional = true}
|
||||
brotli = { workspace = true , optional = true}
|
||||
zstd = { workspace = true , optional = true}
|
||||
snap = { workspace = true , optional = true}
|
||||
lz4 = { workspace = true , optional = true}
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
rand = {workspace = true}
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { workspace = true, optional = true, features = ["std", "fileapi", "minwindef", "ntdef", "winnt"] }
|
||||
@@ -47,9 +52,10 @@ tls = ["dep:rustls", "dep:rustls-pemfile", "dep:rustls-pki-types"] # tls charac
|
||||
net = ["ip","dep:url", "dep:netif", "dep:lazy_static"] # empty network features
|
||||
io = ["dep:tokio"]
|
||||
path = []
|
||||
compress =["dep:flate2","dep:brotli","dep:snap","dep:lz4","dep:zstd"]
|
||||
string = ["dep:regex","dep:lazy_static"]
|
||||
crypto = ["dep:base64-simd","dep:hex-simd"]
|
||||
hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:siphasher"]
|
||||
os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities
|
||||
integration = [] # integration test features
|
||||
full = ["ip", "tls", "net", "io","hash", "os", "integration","path","crypto", "string"] # all features
|
||||
full = ["ip", "tls", "net", "io","hash", "os", "integration","path","crypto", "string","compress"] # all features
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
use http::HeaderMap;
|
||||
use std::io::Write;
|
||||
use tokio::io;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
|
||||
pub enum CompressionAlgorithm {
|
||||
None,
|
||||
Gzip,
|
||||
#[default]
|
||||
Deflate,
|
||||
Zstd,
|
||||
#[default]
|
||||
Lz4,
|
||||
Brotli,
|
||||
Snappy,
|
||||
@@ -16,6 +16,7 @@ pub enum CompressionAlgorithm {
|
||||
impl CompressionAlgorithm {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
CompressionAlgorithm::None => "none",
|
||||
CompressionAlgorithm::Gzip => "gzip",
|
||||
CompressionAlgorithm::Deflate => "deflate",
|
||||
CompressionAlgorithm::Zstd => "zstd",
|
||||
@@ -42,10 +43,8 @@ impl std::str::FromStr for CompressionAlgorithm {
|
||||
"lz4" => Ok(CompressionAlgorithm::Lz4),
|
||||
"brotli" => Ok(CompressionAlgorithm::Brotli),
|
||||
"snappy" => Ok(CompressionAlgorithm::Snappy),
|
||||
_ => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("Unsupported compression algorithm: {}", s),
|
||||
)),
|
||||
"none" => Ok(CompressionAlgorithm::None),
|
||||
_ => Err(std::io::Error::other(format!("Unsupported compression algorithm: {}", s))),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -88,6 +87,7 @@ pub fn compress_block(input: &[u8], algorithm: CompressionAlgorithm) -> Vec<u8>
|
||||
let _ = encoder.write_all(input);
|
||||
encoder.into_inner().unwrap_or_default()
|
||||
}
|
||||
CompressionAlgorithm::None => input.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,20 +129,15 @@ pub fn decompress_block(compressed: &[u8], algorithm: CompressionAlgorithm) -> i
|
||||
std::io::Read::read_to_end(&mut decoder, &mut out)?;
|
||||
Ok(out)
|
||||
}
|
||||
CompressionAlgorithm::None => Ok(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub const MIN_COMPRESSIBLE_SIZE: i64 = 4096;
|
||||
|
||||
pub fn is_compressible(_headers: &HeaderMap) -> bool {
|
||||
// TODO: Implement this function
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
#[test]
|
||||
fn test_compress_decompress_gzip() {
|
||||
@@ -267,4 +262,57 @@ mod tests {
|
||||
&& !snappy.is_empty()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compression_benchmark() {
|
||||
let sizes = [128 * 1024, 512 * 1024, 1024 * 1024];
|
||||
let algorithms = [
|
||||
CompressionAlgorithm::Gzip,
|
||||
CompressionAlgorithm::Deflate,
|
||||
CompressionAlgorithm::Zstd,
|
||||
CompressionAlgorithm::Lz4,
|
||||
CompressionAlgorithm::Brotli,
|
||||
CompressionAlgorithm::Snappy,
|
||||
];
|
||||
|
||||
println!("\n压缩算法基准测试结果:");
|
||||
println!(
|
||||
"{:<10} {:<10} {:<15} {:<15} {:<15}",
|
||||
"数据大小", "算法", "压缩时间(ms)", "压缩后大小", "压缩率"
|
||||
);
|
||||
|
||||
for size in sizes {
|
||||
// 生成可压缩的数据(重复的文本模式)
|
||||
let pattern = b"Hello, this is a test pattern that will be repeated multiple times to create compressible data. ";
|
||||
let data: Vec<u8> = pattern.iter().cycle().take(size).copied().collect();
|
||||
|
||||
for algo in algorithms {
|
||||
// 压缩测试
|
||||
let start = Instant::now();
|
||||
let compressed = compress_block(&data, algo);
|
||||
let compress_time = start.elapsed();
|
||||
|
||||
// 解压测试
|
||||
let start = Instant::now();
|
||||
let _decompressed = decompress_block(&compressed, algo).unwrap();
|
||||
let _decompress_time = start.elapsed();
|
||||
|
||||
// 计算压缩率
|
||||
let compression_ratio = (size as f64 / compressed.len() as f64) as f32;
|
||||
|
||||
println!(
|
||||
"{:<10} {:<10} {:<15.2} {:<15} {:<15.2}x",
|
||||
format!("{}KB", size / 1024),
|
||||
algo.as_str(),
|
||||
compress_time.as_secs_f64() * 1000.0,
|
||||
compressed.len(),
|
||||
compression_ratio
|
||||
);
|
||||
|
||||
// 验证解压结果
|
||||
assert_eq!(_decompressed, data);
|
||||
}
|
||||
println!(); // 添加空行分隔不同大小的结果
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -25,6 +25,9 @@ pub mod string;
|
||||
#[cfg(feature = "crypto")]
|
||||
pub mod crypto;
|
||||
|
||||
#[cfg(feature = "compress")]
|
||||
pub mod compress;
|
||||
|
||||
#[cfg(feature = "tls")]
|
||||
pub use certs::*;
|
||||
#[cfg(feature = "hash")]
|
||||
@@ -36,3 +39,6 @@ pub use ip::*;
|
||||
|
||||
#[cfg(feature = "crypto")]
|
||||
pub use crypto::*;
|
||||
|
||||
#[cfg(feature = "compress")]
|
||||
pub use compress::*;
|
||||
|
||||
@@ -32,6 +32,29 @@ pub fn match_pattern(pattern: &str, name: &str) -> bool {
|
||||
deep_match_rune(name.as_bytes(), pattern.as_bytes(), false)
|
||||
}
|
||||
|
||||
pub fn has_pattern(patterns: &[&str], match_str: &str) -> bool {
|
||||
for pattern in patterns {
|
||||
if match_simple(pattern, match_str) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn has_string_suffix_in_slice(str: &str, list: &[&str]) -> bool {
|
||||
let str = str.to_lowercase();
|
||||
for v in list {
|
||||
if *v == "*" {
|
||||
return true;
|
||||
}
|
||||
|
||||
if str.ends_with(&v.to_lowercase()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn deep_match_rune(str_: &[u8], pattern: &[u8], simple: bool) -> bool {
|
||||
let (mut str_, mut pattern) = (str_, pattern);
|
||||
while !pattern.is_empty() {
|
||||
|
||||
@@ -91,6 +91,7 @@ winapi = { workspace = true }
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
temp-env = "0.2.0"
|
||||
|
||||
[build-dependencies]
|
||||
shadow-rs = { workspace = true, features = ["build", "metadata"] }
|
||||
|
||||
@@ -68,14 +68,20 @@ pub async fn create_bitrot_writer(
|
||||
disk: Option<&DiskStore>,
|
||||
volume: &str,
|
||||
path: &str,
|
||||
length: usize,
|
||||
length: i64,
|
||||
shard_size: usize,
|
||||
checksum_algo: HashAlgorithm,
|
||||
) -> disk::error::Result<BitrotWriterWrapper> {
|
||||
let writer = if is_inline_buffer {
|
||||
CustomWriter::new_inline_buffer()
|
||||
} else if let Some(disk) = disk {
|
||||
let length = length.div_ceil(shard_size) * checksum_algo.size() + length;
|
||||
let length = if length > 0 {
|
||||
let length = length as usize;
|
||||
(length.div_ceil(shard_size) * checksum_algo.size() + length) as i64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let file = disk.create_file("", volume, path, length).await?;
|
||||
CustomWriter::new_tokio_writer(file)
|
||||
} else {
|
||||
|
||||
@@ -443,7 +443,6 @@ impl BucketMetadataSys {
|
||||
let bm = match self.get_config(bucket).await {
|
||||
Ok((res, _)) => res,
|
||||
Err(err) => {
|
||||
warn!("get_object_lock_config err {:?}", &err);
|
||||
return if err == Error::ConfigNotFound {
|
||||
Err(BucketMetadataError::BucketObjectLockConfigNotFound.into())
|
||||
} else {
|
||||
|
||||
@@ -511,8 +511,8 @@ pub async fn get_heal_replicate_object_info(
|
||||
|
||||
let mut result = ReplicateObjectInfo {
|
||||
name: oi.name.clone(),
|
||||
size: oi.size as i64,
|
||||
actual_size: asz as i64,
|
||||
size: oi.size,
|
||||
actual_size: asz,
|
||||
bucket: oi.bucket.clone(),
|
||||
//version_id: oi.version_id.clone(),
|
||||
version_id: oi
|
||||
@@ -814,8 +814,8 @@ impl ReplicationPool {
|
||||
vsender.pop(); // Dropping the sender will close the channel
|
||||
}
|
||||
self.workers_sender = vsender;
|
||||
warn!("self sender size is {:?}", self.workers_sender.len());
|
||||
warn!("self sender size is {:?}", self.workers_sender.len());
|
||||
// warn!("self sender size is {:?}", self.workers_sender.len());
|
||||
// warn!("self sender size is {:?}", self.workers_sender.len());
|
||||
}
|
||||
|
||||
async fn resize_failed_workers(&self, _count: usize) {
|
||||
@@ -1758,13 +1758,13 @@ pub async fn schedule_replication(oi: ObjectInfo, o: Arc<store::ECStore>, dsc: R
|
||||
let replication_timestamp = Utc::now(); // Placeholder for timestamp parsing
|
||||
let replication_state = oi.replication_state();
|
||||
|
||||
let actual_size = oi.actual_size.unwrap_or(0);
|
||||
let actual_size = oi.actual_size;
|
||||
//let ssec = oi.user_defined.contains_key("ssec");
|
||||
let ssec = false;
|
||||
|
||||
let ri = ReplicateObjectInfo {
|
||||
name: oi.name,
|
||||
size: oi.size as i64,
|
||||
size: oi.size,
|
||||
bucket: oi.bucket,
|
||||
version_id: oi
|
||||
.version_id
|
||||
@@ -2018,8 +2018,8 @@ impl ReplicateObjectInfo {
|
||||
mod_time: Some(
|
||||
OffsetDateTime::from_unix_timestamp(self.mod_time.timestamp()).unwrap_or_else(|_| OffsetDateTime::now_utc()),
|
||||
),
|
||||
size: self.size as usize,
|
||||
actual_size: Some(self.actual_size as usize),
|
||||
size: self.size,
|
||||
actual_size: self.actual_size,
|
||||
is_dir: false,
|
||||
user_defined: None, // 可以按需从别处导入
|
||||
parity_blocks: 0,
|
||||
@@ -2317,7 +2317,7 @@ impl ReplicateObjectInfo {
|
||||
|
||||
// 设置对象大小
|
||||
//rinfo.size = object_info.actual_size.unwrap_or(0);
|
||||
rinfo.size = object_info.actual_size.map_or(0, |v| v as i64);
|
||||
rinfo.size = object_info.actual_size;
|
||||
//rinfo.replication_action = object_info.
|
||||
|
||||
rinfo.replication_status = ReplicationStatusType::Completed;
|
||||
|
||||
115
ecstore/src/compress.rs
Normal file
115
ecstore/src/compress.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use rustfs_utils::string::has_pattern;
|
||||
use rustfs_utils::string::has_string_suffix_in_slice;
|
||||
use std::env;
|
||||
use tracing::error;
|
||||
|
||||
pub const MIN_COMPRESSIBLE_SIZE: usize = 4096;
|
||||
|
||||
// 环境变量名称,用于控制是否启用压缩
|
||||
pub const ENV_COMPRESSION_ENABLED: &str = "RUSTFS_COMPRESSION_ENABLED";
|
||||
|
||||
// Some standard object extensions which we strictly dis-allow for compression.
|
||||
pub const STANDARD_EXCLUDE_COMPRESS_EXTENSIONS: &[&str] = &[
|
||||
".gz", ".bz2", ".rar", ".zip", ".7z", ".xz", ".mp4", ".mkv", ".mov", ".jpg", ".png", ".gif",
|
||||
];
|
||||
|
||||
// Some standard content-types which we strictly dis-allow for compression.
|
||||
pub const STANDARD_EXCLUDE_COMPRESS_CONTENT_TYPES: &[&str] = &[
|
||||
"video/*",
|
||||
"audio/*",
|
||||
"application/zip",
|
||||
"application/x-gzip",
|
||||
"application/x-zip-compressed",
|
||||
"application/x-compress",
|
||||
"application/x-spoon",
|
||||
];
|
||||
|
||||
pub fn is_compressible(headers: &http::HeaderMap, object_name: &str) -> bool {
|
||||
// 检查环境变量是否启用压缩,默认关闭
|
||||
if let Ok(compression_enabled) = env::var(ENV_COMPRESSION_ENABLED) {
|
||||
if compression_enabled.to_lowercase() != "true" {
|
||||
error!("Compression is disabled by environment variable");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// 环境变量未设置时默认关闭
|
||||
return false;
|
||||
}
|
||||
|
||||
let content_type = headers.get("content-type").and_then(|s| s.to_str().ok()).unwrap_or("");
|
||||
|
||||
// TODO: crypto request return false
|
||||
|
||||
if has_string_suffix_in_slice(object_name, STANDARD_EXCLUDE_COMPRESS_EXTENSIONS) {
|
||||
error!("object_name: {} is not compressible", object_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if !content_type.is_empty() && has_pattern(STANDARD_EXCLUDE_COMPRESS_CONTENT_TYPES, content_type) {
|
||||
error!("content_type: {} is not compressible", content_type);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
|
||||
// TODO: check from config
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use temp_env;
|
||||
|
||||
#[test]
|
||||
fn test_is_compressible() {
|
||||
use http::HeaderMap;
|
||||
|
||||
let headers = HeaderMap::new();
|
||||
|
||||
// 测试环境变量控制
|
||||
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("false"), || {
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
});
|
||||
|
||||
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("true"), || {
|
||||
assert!(is_compressible(&headers, "file.txt"));
|
||||
});
|
||||
|
||||
temp_env::with_var_unset(ENV_COMPRESSION_ENABLED, || {
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
});
|
||||
|
||||
temp_env::with_var(ENV_COMPRESSION_ENABLED, Some("true"), || {
|
||||
let mut headers = HeaderMap::new();
|
||||
// 测试不可压缩的扩展名
|
||||
headers.insert("content-type", "text/plain".parse().unwrap());
|
||||
assert!(!is_compressible(&headers, "file.gz"));
|
||||
assert!(!is_compressible(&headers, "file.zip"));
|
||||
assert!(!is_compressible(&headers, "file.mp4"));
|
||||
assert!(!is_compressible(&headers, "file.jpg"));
|
||||
|
||||
// 测试不可压缩的内容类型
|
||||
headers.insert("content-type", "video/mp4".parse().unwrap());
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
|
||||
headers.insert("content-type", "audio/mpeg".parse().unwrap());
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
|
||||
headers.insert("content-type", "application/zip".parse().unwrap());
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
|
||||
headers.insert("content-type", "application/x-gzip".parse().unwrap());
|
||||
assert!(!is_compressible(&headers, "file.txt"));
|
||||
|
||||
// 测试可压缩的情况
|
||||
headers.insert("content-type", "text/plain".parse().unwrap());
|
||||
assert!(is_compressible(&headers, "file.txt"));
|
||||
assert!(is_compressible(&headers, "file.log"));
|
||||
|
||||
headers.insert("content-type", "text/html".parse().unwrap());
|
||||
assert!(is_compressible(&headers, "file.html"));
|
||||
|
||||
headers.insert("content-type", "application/json".parse().unwrap());
|
||||
assert!(is_compressible(&headers, "file.json"));
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -93,17 +93,11 @@ pub async fn delete_config<S: StorageAPI>(api: Arc<S>, file: &str) -> Result<()>
|
||||
}
|
||||
|
||||
pub async fn save_config_with_opts<S: StorageAPI>(api: Arc<S>, file: &str, data: Vec<u8>, opts: &ObjectOptions) -> Result<()> {
|
||||
warn!(
|
||||
"save_config_with_opts, bucket: {}, file: {}, data len: {}",
|
||||
RUSTFS_META_BUCKET,
|
||||
file,
|
||||
data.len()
|
||||
);
|
||||
if let Err(err) = api
|
||||
.put_object(RUSTFS_META_BUCKET, file, &mut PutObjReader::from_vec(data), opts)
|
||||
.await
|
||||
{
|
||||
warn!("save_config_with_opts: err: {:?}, file: {}", err, file);
|
||||
error!("save_config_with_opts: err: {:?}, file: {}", err, file);
|
||||
return Err(err);
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -112,7 +112,13 @@ impl Config {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn should_inline(&self, shard_size: usize, versioned: bool) -> bool {
|
||||
pub fn should_inline(&self, shard_size: i64, versioned: bool) -> bool {
|
||||
if shard_size < 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let shard_size = shard_size as usize;
|
||||
|
||||
let mut inline_block = DEFAULT_INLINE_BLOCK;
|
||||
if self.initialized {
|
||||
inline_block = self.inline_block;
|
||||
|
||||
@@ -773,7 +773,7 @@ impl LocalDisk {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
if e != DiskError::VolumeNotFound && e != Error::FileNotFound {
|
||||
warn!("scan list_dir {}, err {:?}", ¤t, &e);
|
||||
debug!("scan list_dir {}, err {:?}", ¤t, &e);
|
||||
}
|
||||
|
||||
if opts.report_notfound && e == Error::FileNotFound && current == &opts.base_dir {
|
||||
@@ -785,7 +785,6 @@ impl LocalDisk {
|
||||
};
|
||||
|
||||
if entries.is_empty() {
|
||||
warn!("scan list_dir {}, entries is empty", ¤t);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -801,7 +800,6 @@ impl LocalDisk {
|
||||
let entry = item.clone();
|
||||
// check limit
|
||||
if opts.limit > 0 && *objs_returned >= opts.limit {
|
||||
warn!("scan list_dir {}, limit reached", ¤t);
|
||||
return Ok(());
|
||||
}
|
||||
// check prefix
|
||||
@@ -1207,7 +1205,7 @@ impl DiskAPI for LocalDisk {
|
||||
let err = self
|
||||
.bitrot_verify(
|
||||
&part_path,
|
||||
erasure.shard_file_size(part.size),
|
||||
erasure.shard_file_size(part.size as i64) as usize,
|
||||
checksum_info.algorithm,
|
||||
&checksum_info.hash,
|
||||
erasure.shard_size(),
|
||||
@@ -1248,7 +1246,7 @@ impl DiskAPI for LocalDisk {
|
||||
resp.results[i] = CHECK_PART_FILE_NOT_FOUND;
|
||||
continue;
|
||||
}
|
||||
if (st.len() as usize) < fi.erasure.shard_file_size(part.size) {
|
||||
if (st.len() as i64) < fi.erasure.shard_file_size(part.size as i64) {
|
||||
resp.results[i] = CHECK_PART_FILE_CORRUPT;
|
||||
continue;
|
||||
}
|
||||
@@ -1400,7 +1398,7 @@ impl DiskAPI for LocalDisk {
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
|
||||
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, _file_size: i64) -> Result<FileWriter> {
|
||||
// warn!("disk create_file: origvolume: {}, volume: {}, path: {}", origvolume, volume, path);
|
||||
|
||||
if !origvolume.is_empty() {
|
||||
@@ -1574,11 +1572,6 @@ impl DiskAPI for LocalDisk {
|
||||
let mut current = opts.base_dir.clone();
|
||||
self.scan_dir(&mut current, &opts, &mut out, &mut objs_returned).await?;
|
||||
|
||||
warn!(
|
||||
"walk_dir: done, volume_dir: {:?}, base_dir: {}",
|
||||
volume_dir.to_string_lossy(),
|
||||
opts.base_dir
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2239,7 +2232,7 @@ impl DiskAPI for LocalDisk {
|
||||
let mut obj_deleted = false;
|
||||
for info in obj_infos.iter() {
|
||||
let done = ScannerMetrics::time(ScannerMetric::ApplyVersion);
|
||||
let sz: usize;
|
||||
let sz: i64;
|
||||
(obj_deleted, sz) = item.apply_actions(info, &mut size_s).await;
|
||||
done();
|
||||
|
||||
@@ -2260,7 +2253,7 @@ impl DiskAPI for LocalDisk {
|
||||
size_s.versions += 1;
|
||||
}
|
||||
|
||||
size_s.total_size += sz;
|
||||
size_s.total_size += sz as usize;
|
||||
|
||||
if info.delete_marker {
|
||||
continue;
|
||||
|
||||
@@ -304,7 +304,7 @@ impl DiskAPI for Disk {
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self))]
|
||||
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
|
||||
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, _file_size: i64) -> Result<FileWriter> {
|
||||
match self {
|
||||
Disk::Local(local_disk) => local_disk.create_file(_origvolume, volume, path, _file_size).await,
|
||||
Disk::Remote(remote_disk) => remote_disk.create_file(_origvolume, volume, path, _file_size).await,
|
||||
@@ -491,7 +491,7 @@ pub trait DiskAPI: Debug + Send + Sync + 'static {
|
||||
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader>;
|
||||
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader>;
|
||||
async fn append_file(&self, volume: &str, path: &str) -> Result<FileWriter>;
|
||||
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter>;
|
||||
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result<FileWriter>;
|
||||
// ReadFileStream
|
||||
async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()>;
|
||||
async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Bytes) -> Result<()>;
|
||||
|
||||
@@ -640,7 +640,7 @@ impl DiskAPI for RemoteDisk {
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter> {
|
||||
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, file_size: i64) -> Result<FileWriter> {
|
||||
info!("create_file {}/{}/{}", self.endpoint.to_string(), volume, path);
|
||||
|
||||
let url = format!(
|
||||
|
||||
@@ -30,7 +30,7 @@ where
|
||||
// readers传入前应处理disk错误,确保每个reader达到可用数量的BitrotReader
|
||||
pub fn new(readers: Vec<Option<BitrotReader<R>>>, e: Erasure, offset: usize, total_length: usize) -> Self {
|
||||
let shard_size = e.shard_size();
|
||||
let shard_file_size = e.shard_file_size(total_length);
|
||||
let shard_file_size = e.shard_file_size(total_length as i64) as usize;
|
||||
|
||||
let offset = (offset / e.block_size) * shard_size;
|
||||
|
||||
@@ -142,6 +142,7 @@ where
|
||||
W: tokio::io::AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
if get_data_block_len(en_blocks, data_blocks) < length {
|
||||
error!("write_data_blocks get_data_block_len < length");
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "Not enough data blocks to write"));
|
||||
}
|
||||
|
||||
@@ -150,6 +151,7 @@ where
|
||||
|
||||
for block_op in &en_blocks[..data_blocks] {
|
||||
if block_op.is_none() {
|
||||
error!("write_data_blocks block_op.is_none()");
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "Missing data block"));
|
||||
}
|
||||
|
||||
@@ -164,7 +166,10 @@ where
|
||||
offset = 0;
|
||||
|
||||
if write_left < block.len() {
|
||||
writer.write_all(&block_slice[..write_left]).await?;
|
||||
writer.write_all(&block_slice[..write_left]).await.map_err(|e| {
|
||||
error!("write_data_blocks write_all err: {}", e);
|
||||
e
|
||||
})?;
|
||||
|
||||
total_written += write_left;
|
||||
break;
|
||||
@@ -172,7 +177,10 @@ where
|
||||
|
||||
let n = block_slice.len();
|
||||
|
||||
writer.write_all(block_slice).await?;
|
||||
writer.write_all(block_slice).await.map_err(|e| {
|
||||
error!("write_data_blocks write_all2 err: {}", e);
|
||||
e
|
||||
})?;
|
||||
|
||||
write_left -= n;
|
||||
|
||||
@@ -228,6 +236,7 @@ impl Erasure {
|
||||
};
|
||||
|
||||
if block_length == 0 {
|
||||
// error!("erasure decode decode block_length == 0");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -469,22 +469,27 @@ impl Erasure {
|
||||
}
|
||||
/// Calculate the total erasure file size for a given original size.
|
||||
// Returns the final erasure size from the original size
|
||||
pub fn shard_file_size(&self, total_length: usize) -> usize {
|
||||
pub fn shard_file_size(&self, total_length: i64) -> i64 {
|
||||
if total_length == 0 {
|
||||
return 0;
|
||||
}
|
||||
if total_length < 0 {
|
||||
return total_length;
|
||||
}
|
||||
|
||||
let total_length = total_length as usize;
|
||||
|
||||
let num_shards = total_length / self.block_size;
|
||||
let last_block_size = total_length % self.block_size;
|
||||
let last_shard_size = calc_shard_size(last_block_size, self.data_shards);
|
||||
num_shards * self.shard_size() + last_shard_size
|
||||
(num_shards * self.shard_size() + last_shard_size) as i64
|
||||
}
|
||||
|
||||
/// Calculate the offset in the erasure file where reading begins.
|
||||
// Returns the offset in the erasure file where reading begins
|
||||
pub fn shard_file_offset(&self, start_offset: usize, length: usize, total_length: usize) -> usize {
|
||||
let shard_size = self.shard_size();
|
||||
let shard_file_size = self.shard_file_size(total_length);
|
||||
let shard_file_size = self.shard_file_size(total_length as i64) as usize;
|
||||
let end_shard = (start_offset + length) / self.block_size;
|
||||
let mut till_offset = end_shard * shard_size + shard_size;
|
||||
if till_offset > shard_file_size {
|
||||
|
||||
@@ -526,7 +526,7 @@ impl ScannerItem {
|
||||
cumulative_size += obj_info.size;
|
||||
}
|
||||
|
||||
if cumulative_size >= SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE.load(Ordering::SeqCst) as usize {
|
||||
if cumulative_size >= SCANNER_EXCESS_OBJECT_VERSIONS_TOTAL_SIZE.load(Ordering::SeqCst) as i64 {
|
||||
//todo
|
||||
}
|
||||
|
||||
@@ -558,7 +558,7 @@ impl ScannerItem {
|
||||
Ok(object_infos)
|
||||
}
|
||||
|
||||
pub async fn apply_actions(&mut self, oi: &ObjectInfo, _size_s: &mut SizeSummary) -> (bool, usize) {
|
||||
pub async fn apply_actions(&mut self, oi: &ObjectInfo, _size_s: &mut SizeSummary) -> (bool, i64) {
|
||||
let done = ScannerMetrics::time(ScannerMetric::Ilm);
|
||||
//todo: lifecycle
|
||||
info!(
|
||||
@@ -641,21 +641,21 @@ impl ScannerItem {
|
||||
match tgt_status {
|
||||
ReplicationStatusType::Pending => {
|
||||
tgt_size_s.pending_count += 1;
|
||||
tgt_size_s.pending_size += oi.size;
|
||||
tgt_size_s.pending_size += oi.size as usize;
|
||||
size_s.pending_count += 1;
|
||||
size_s.pending_size += oi.size;
|
||||
size_s.pending_size += oi.size as usize;
|
||||
}
|
||||
ReplicationStatusType::Failed => {
|
||||
tgt_size_s.failed_count += 1;
|
||||
tgt_size_s.failed_size += oi.size;
|
||||
tgt_size_s.failed_size += oi.size as usize;
|
||||
size_s.failed_count += 1;
|
||||
size_s.failed_size += oi.size;
|
||||
size_s.failed_size += oi.size as usize;
|
||||
}
|
||||
ReplicationStatusType::Completed | ReplicationStatusType::CompletedLegacy => {
|
||||
tgt_size_s.replicated_count += 1;
|
||||
tgt_size_s.replicated_size += oi.size;
|
||||
tgt_size_s.replicated_size += oi.size as usize;
|
||||
size_s.replicated_count += 1;
|
||||
size_s.replicated_size += oi.size;
|
||||
size_s.replicated_size += oi.size as usize;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -663,7 +663,7 @@ impl ScannerItem {
|
||||
|
||||
if matches!(oi.replication_status, ReplicationStatusType::Replica) {
|
||||
size_s.replica_count += 1;
|
||||
size_s.replica_size += oi.size;
|
||||
size_s.replica_size += oi.size as usize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ pub mod bucket;
|
||||
pub mod cache_value;
|
||||
mod chunk_stream;
|
||||
pub mod cmd;
|
||||
pub mod compress;
|
||||
pub mod config;
|
||||
pub mod disk;
|
||||
pub mod disks_layout;
|
||||
|
||||
@@ -24,7 +24,7 @@ use futures::future::BoxFuture;
|
||||
use http::HeaderMap;
|
||||
use rmp_serde::{Deserializer, Serializer};
|
||||
use rustfs_filemeta::{MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
|
||||
use rustfs_rio::HashReader;
|
||||
use rustfs_rio::{HashReader, WarpReader};
|
||||
use rustfs_utils::path::{SLASH_SEPARATOR, encode_dir_object, path_join};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
@@ -33,7 +33,7 @@ use std::io::{Cursor, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
use tokio::sync::broadcast::Receiver as B_Receiver;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
@@ -1254,6 +1254,7 @@ impl ECStore {
|
||||
}
|
||||
|
||||
if let Err(err) = self
|
||||
.clone()
|
||||
.complete_multipart_upload(
|
||||
&bucket,
|
||||
&object_info.name,
|
||||
@@ -1275,10 +1276,9 @@ impl ECStore {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut data = PutObjReader::new(
|
||||
HashReader::new(rd.stream, object_info.size as i64, object_info.size as i64, None, false)?,
|
||||
object_info.size,
|
||||
);
|
||||
let reader = BufReader::new(rd.stream);
|
||||
let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?;
|
||||
let mut data = PutObjReader::new(hrd);
|
||||
|
||||
if let Err(err) = self
|
||||
.put_object(
|
||||
|
||||
@@ -12,13 +12,13 @@ use crate::store_api::{CompletePart, GetObjectReader, ObjectIO, ObjectOptions, P
|
||||
use common::defer;
|
||||
use http::HeaderMap;
|
||||
use rustfs_filemeta::{FileInfo, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams};
|
||||
use rustfs_rio::HashReader;
|
||||
use rustfs_rio::{HashReader, WarpReader};
|
||||
use rustfs_utils::path::encode_dir_object;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
use time::OffsetDateTime;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::{AsyncReadExt, BufReader};
|
||||
use tokio::sync::broadcast::{self, Receiver as B_Receiver};
|
||||
use tokio::time::{Duration, Instant};
|
||||
use tracing::{error, info, warn};
|
||||
@@ -62,7 +62,7 @@ impl RebalanceStats {
|
||||
|
||||
self.num_versions += 1;
|
||||
let on_disk_size = if !fi.deleted {
|
||||
fi.size as i64 * (fi.erasure.data_blocks + fi.erasure.parity_blocks) as i64 / fi.erasure.data_blocks as i64
|
||||
fi.size * (fi.erasure.data_blocks + fi.erasure.parity_blocks) as i64 / fi.erasure.data_blocks as i64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
@@ -703,7 +703,7 @@ impl ECStore {
|
||||
#[allow(unused_assignments)]
|
||||
#[tracing::instrument(skip(self, set))]
|
||||
async fn rebalance_entry(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
bucket: String,
|
||||
pool_index: usize,
|
||||
entry: MetaCacheEntry,
|
||||
@@ -834,7 +834,7 @@ impl ECStore {
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(err) = self.rebalance_object(pool_index, bucket.clone(), rd).await {
|
||||
if let Err(err) = self.clone().rebalance_object(pool_index, bucket.clone(), rd).await {
|
||||
if is_err_object_not_found(&err) || is_err_version_not_found(&err) || is_err_data_movement_overwrite(&err) {
|
||||
ignore = true;
|
||||
warn!("rebalance_entry {} Entry {} is already deleted, skipping", &bucket, version.name);
|
||||
@@ -890,7 +890,7 @@ impl ECStore {
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, rd))]
|
||||
async fn rebalance_object(&self, pool_idx: usize, bucket: String, rd: GetObjectReader) -> Result<()> {
|
||||
async fn rebalance_object(self: Arc<Self>, pool_idx: usize, bucket: String, rd: GetObjectReader) -> Result<()> {
|
||||
let object_info = rd.object_info.clone();
|
||||
|
||||
// TODO: check : use size or actual_size ?
|
||||
@@ -969,6 +969,7 @@ impl ECStore {
|
||||
}
|
||||
|
||||
if let Err(err) = self
|
||||
.clone()
|
||||
.complete_multipart_upload(
|
||||
&bucket,
|
||||
&object_info.name,
|
||||
@@ -989,8 +990,9 @@ impl ECStore {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let hrd = HashReader::new(rd.stream, object_info.size as i64, object_info.size as i64, None, false)?;
|
||||
let mut data = PutObjReader::new(hrd, object_info.size);
|
||||
let reader = BufReader::new(rd.stream);
|
||||
let hrd = HashReader::new(Box::new(WarpReader::new(reader)), object_info.size, object_info.size, None, false)?;
|
||||
let mut data = PutObjReader::new(hrd);
|
||||
|
||||
if let Err(err) = self
|
||||
.put_object(
|
||||
|
||||
@@ -55,13 +55,14 @@ use lock::{LockApi, namespace_lock::NsLockMap};
|
||||
use madmin::heal_commands::{HealDriveInfo, HealResultItem};
|
||||
use md5::{Digest as Md5Digest, Md5};
|
||||
use rand::{Rng, seq::SliceRandom};
|
||||
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
|
||||
use rustfs_filemeta::{
|
||||
FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo,
|
||||
RawFileInfo, file_info_from_raw,
|
||||
headers::{AMZ_OBJECT_TAGGING, AMZ_STORAGE_CLASS},
|
||||
merge_file_meta_versions,
|
||||
};
|
||||
use rustfs_rio::{EtagResolvable, HashReader};
|
||||
use rustfs_rio::{EtagResolvable, HashReader, TryGetIndex as _, WarpReader};
|
||||
use rustfs_utils::{
|
||||
HashAlgorithm,
|
||||
crypto::{base64_decode, base64_encode, hex},
|
||||
@@ -860,7 +861,8 @@ impl SetDisks {
|
||||
};
|
||||
|
||||
if let Some(err) = reduce_read_quorum_errs(errs, OBJECT_OP_IGNORED_ERRS, expected_rquorum) {
|
||||
error!("object_quorum_from_meta: {:?}, errs={:?}", err, errs);
|
||||
// let object = parts_metadata.first().map(|v| v.name.clone()).unwrap_or_default();
|
||||
// error!("object_quorum_from_meta: {:?}, errs={:?}, object={:?}", err, errs, object);
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
@@ -1773,7 +1775,7 @@ impl SetDisks {
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
error!("Self::object_quorum_from_meta: {:?}, bucket: {}, object: {}", &e, bucket, object);
|
||||
// error!("Self::object_quorum_from_meta: {:?}, bucket: {}, object: {}", &e, bucket, object);
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
@@ -1817,7 +1819,7 @@ impl SetDisks {
|
||||
bucket: &str,
|
||||
object: &str,
|
||||
offset: usize,
|
||||
length: usize,
|
||||
length: i64,
|
||||
writer: &mut W,
|
||||
fi: FileInfo,
|
||||
files: Vec<FileInfo>,
|
||||
@@ -1830,11 +1832,16 @@ impl SetDisks {
|
||||
{
|
||||
let (disks, files) = Self::shuffle_disks_and_parts_metadata_by_index(disks, &files, &fi);
|
||||
|
||||
let total_size = fi.size;
|
||||
let total_size = fi.size as usize;
|
||||
|
||||
let length = { if length == 0 { total_size - offset } else { length } };
|
||||
let length = if length < 0 {
|
||||
fi.size as usize - offset
|
||||
} else {
|
||||
length as usize
|
||||
};
|
||||
|
||||
if offset > total_size || offset + length > total_size {
|
||||
error!("get_object_with_fileinfo offset out of range: {}, total_size: {}", offset, total_size);
|
||||
return Err(Error::other("offset out of range"));
|
||||
}
|
||||
|
||||
@@ -1852,11 +1859,6 @@ impl SetDisks {
|
||||
|
||||
let (last_part_index, _) = fi.to_part_offset(end_offset)?;
|
||||
|
||||
// debug!(
|
||||
// "get_object_with_fileinfo end offset:{}, last_part_index:{},part_offset:{}",
|
||||
// end_offset, last_part_index, 0
|
||||
// );
|
||||
|
||||
// let erasure = Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
|
||||
|
||||
let erasure = erasure_coding::Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
|
||||
@@ -1870,7 +1872,7 @@ impl SetDisks {
|
||||
let part_number = fi.parts[i].number;
|
||||
let part_size = fi.parts[i].size;
|
||||
let mut part_length = part_size - part_offset;
|
||||
if part_length > length - total_readed {
|
||||
if part_length > (length - total_readed) {
|
||||
part_length = length - total_readed
|
||||
}
|
||||
|
||||
@@ -1912,7 +1914,7 @@ impl SetDisks {
|
||||
error!("create_bitrot_reader reduce_read_quorum_errs {:?}", &errors);
|
||||
return Err(to_object_err(read_err.into(), vec![bucket, object]));
|
||||
}
|
||||
|
||||
error!("create_bitrot_reader not enough disks to read: {:?}", &errors);
|
||||
return Err(Error::other(format!("not enough disks to read: {:?}", errors)));
|
||||
}
|
||||
|
||||
@@ -2259,7 +2261,8 @@ impl SetDisks {
|
||||
erasure_coding::Erasure::default()
|
||||
};
|
||||
|
||||
result.object_size = ObjectInfo::from_file_info(&lastest_meta, bucket, object, true).get_actual_size()?;
|
||||
result.object_size =
|
||||
ObjectInfo::from_file_info(&lastest_meta, bucket, object, true).get_actual_size()? as usize;
|
||||
// Loop to find number of disks with valid data, per-drive
|
||||
// data state and a list of outdated disks on which data needs
|
||||
// to be healed.
|
||||
@@ -2521,7 +2524,7 @@ impl SetDisks {
|
||||
disk.as_ref(),
|
||||
RUSTFS_META_TMP_BUCKET,
|
||||
&format!("{}/{}/part.{}", tmp_id, dst_data_dir, part.number),
|
||||
erasure.shard_file_size(part.size),
|
||||
erasure.shard_file_size(part.size as i64),
|
||||
erasure.shard_size(),
|
||||
HashAlgorithm::HighwayHash256,
|
||||
)
|
||||
@@ -2603,6 +2606,7 @@ impl SetDisks {
|
||||
part.size,
|
||||
part.mod_time,
|
||||
part.actual_size,
|
||||
part.index.clone(),
|
||||
);
|
||||
if is_inline_buffer {
|
||||
if let Some(writer) = writers[index].take() {
|
||||
@@ -2834,7 +2838,7 @@ impl SetDisks {
|
||||
heal_item_type: HEAL_ITEM_OBJECT.to_string(),
|
||||
bucket: bucket.to_string(),
|
||||
object: object.to_string(),
|
||||
object_size: lfi.size,
|
||||
object_size: lfi.size as usize,
|
||||
version_id: version_id.to_string(),
|
||||
disk_count: disk_len,
|
||||
..Default::default()
|
||||
@@ -3500,7 +3504,7 @@ impl SetDisks {
|
||||
if let (Some(started), Some(mod_time)) = (started, version.mod_time) {
|
||||
if mod_time > started {
|
||||
version_not_found += 1;
|
||||
if send(heal_entry_skipped(version.size)).await {
|
||||
if send(heal_entry_skipped(version.size as usize)).await {
|
||||
defer.await;
|
||||
return;
|
||||
}
|
||||
@@ -3544,10 +3548,10 @@ impl SetDisks {
|
||||
|
||||
if version_healed {
|
||||
bg_seq.count_healed(HEAL_ITEM_OBJECT.to_string()).await;
|
||||
result = heal_entry_success(version.size);
|
||||
result = heal_entry_success(version.size as usize);
|
||||
} else {
|
||||
bg_seq.count_failed(HEAL_ITEM_OBJECT.to_string()).await;
|
||||
result = heal_entry_failure(version.size);
|
||||
result = heal_entry_failure(version.size as usize);
|
||||
match version.version_id {
|
||||
Some(version_id) => {
|
||||
info!("unable to heal object {}/{}-v({})", bucket, version.name, version_id);
|
||||
@@ -3863,7 +3867,7 @@ impl ObjectIO for SetDisks {
|
||||
|
||||
let is_inline_buffer = {
|
||||
if let Some(sc) = GLOBAL_StorageClass.get() {
|
||||
sc.should_inline(erasure.shard_file_size(data.content_length), opts.versioned)
|
||||
sc.should_inline(erasure.shard_file_size(data.size()), opts.versioned)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -3878,7 +3882,7 @@ impl ObjectIO for SetDisks {
|
||||
Some(disk),
|
||||
RUSTFS_META_TMP_BUCKET,
|
||||
&tmp_object,
|
||||
erasure.shard_file_size(data.content_length),
|
||||
erasure.shard_file_size(data.size()),
|
||||
erasure.shard_size(),
|
||||
HashAlgorithm::HighwayHash256,
|
||||
)
|
||||
@@ -3924,7 +3928,10 @@ impl ObjectIO for SetDisks {
|
||||
return Err(Error::other(format!("not enough disks to write: {:?}", errors)));
|
||||
}
|
||||
|
||||
let stream = mem::replace(&mut data.stream, HashReader::new(Box::new(Cursor::new(Vec::new())), 0, 0, None, false)?);
|
||||
let stream = mem::replace(
|
||||
&mut data.stream,
|
||||
HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?,
|
||||
);
|
||||
|
||||
let (reader, w_size) = match Arc::new(erasure).encode(stream, &mut writers, write_quorum).await {
|
||||
Ok((r, w)) => (r, w),
|
||||
@@ -3939,6 +3946,16 @@ impl ObjectIO for SetDisks {
|
||||
// error!("close_bitrot_writers err {:?}", err);
|
||||
// }
|
||||
|
||||
if (w_size as i64) < data.size() {
|
||||
return Err(Error::other("put_object write size < data.size()"));
|
||||
}
|
||||
|
||||
if user_defined.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER)) {
|
||||
user_defined.insert(format!("{}compression-size", RESERVED_METADATA_PREFIX_LOWER), w_size.to_string());
|
||||
}
|
||||
|
||||
let index_op = data.stream.try_get_index().map(|v| v.clone().into_vec());
|
||||
|
||||
//TODO: userDefined
|
||||
|
||||
let etag = data.stream.try_resolve_etag().unwrap_or_default();
|
||||
@@ -3949,6 +3966,14 @@ impl ObjectIO for SetDisks {
|
||||
// get content-type
|
||||
}
|
||||
|
||||
let mut actual_size = data.actual_size();
|
||||
if actual_size < 0 {
|
||||
let is_compressed = fi.is_compressed();
|
||||
if !is_compressed {
|
||||
actual_size = w_size as i64;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(sc) = user_defined.get(AMZ_STORAGE_CLASS) {
|
||||
if sc == storageclass::STANDARD {
|
||||
let _ = user_defined.remove(AMZ_STORAGE_CLASS);
|
||||
@@ -3962,17 +3987,19 @@ impl ObjectIO for SetDisks {
|
||||
if let Some(writer) = writers[i].take() {
|
||||
fi.data = Some(writer.into_inline_data().map(bytes::Bytes::from).unwrap_or_default());
|
||||
}
|
||||
|
||||
fi.set_inline_data();
|
||||
}
|
||||
|
||||
fi.metadata = user_defined.clone();
|
||||
fi.mod_time = Some(now);
|
||||
fi.size = w_size;
|
||||
fi.size = w_size as i64;
|
||||
fi.versioned = opts.versioned || opts.version_suspended;
|
||||
fi.add_object_part(1, etag.clone(), w_size, fi.mod_time, w_size);
|
||||
fi.add_object_part(1, etag.clone(), w_size, fi.mod_time, actual_size, index_op.clone());
|
||||
|
||||
fi.set_inline_data();
|
||||
|
||||
// debug!("put_object fi {:?}", &fi)
|
||||
if opts.data_movement {
|
||||
fi.set_data_moved();
|
||||
}
|
||||
}
|
||||
|
||||
let (online_disks, _, op_old_dir) = Self::rename_data(
|
||||
@@ -4566,7 +4593,7 @@ impl StorageAPI for SetDisks {
|
||||
Some(disk),
|
||||
RUSTFS_META_TMP_BUCKET,
|
||||
&tmp_part_path,
|
||||
erasure.shard_file_size(data.content_length),
|
||||
erasure.shard_file_size(data.size()),
|
||||
erasure.shard_size(),
|
||||
HashAlgorithm::HighwayHash256,
|
||||
)
|
||||
@@ -4605,16 +4632,33 @@ impl StorageAPI for SetDisks {
|
||||
return Err(Error::other(format!("not enough disks to write: {:?}", errors)));
|
||||
}
|
||||
|
||||
let stream = mem::replace(&mut data.stream, HashReader::new(Box::new(Cursor::new(Vec::new())), 0, 0, None, false)?);
|
||||
let stream = mem::replace(
|
||||
&mut data.stream,
|
||||
HashReader::new(Box::new(WarpReader::new(Cursor::new(Vec::new()))), 0, 0, None, false)?,
|
||||
);
|
||||
|
||||
let (reader, w_size) = Arc::new(erasure).encode(stream, &mut writers, write_quorum).await?; // TODO: 出错,删除临时目录
|
||||
|
||||
let _ = mem::replace(&mut data.stream, reader);
|
||||
|
||||
if (w_size as i64) < data.size() {
|
||||
return Err(Error::other("put_object_part write size < data.size()"));
|
||||
}
|
||||
|
||||
let index_op = data.stream.try_get_index().map(|v| v.clone().into_vec());
|
||||
|
||||
let mut etag = data.stream.try_resolve_etag().unwrap_or_default();
|
||||
|
||||
if let Some(ref tag) = opts.preserve_etag {
|
||||
etag = tag.clone(); // TODO: 需要验证 etag 是否一致
|
||||
etag = tag.clone();
|
||||
}
|
||||
|
||||
let mut actual_size = data.actual_size();
|
||||
if actual_size < 0 {
|
||||
let is_compressed = fi.is_compressed();
|
||||
if !is_compressed {
|
||||
actual_size = w_size as i64;
|
||||
}
|
||||
}
|
||||
|
||||
let part_info = ObjectPartInfo {
|
||||
@@ -4622,7 +4666,8 @@ impl StorageAPI for SetDisks {
|
||||
number: part_id,
|
||||
size: w_size,
|
||||
mod_time: Some(OffsetDateTime::now_utc()),
|
||||
actual_size: data.content_length,
|
||||
actual_size,
|
||||
index: index_op,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -4649,6 +4694,7 @@ impl StorageAPI for SetDisks {
|
||||
part_num: part_id,
|
||||
last_mod: Some(OffsetDateTime::now_utc()),
|
||||
size: w_size,
|
||||
actual_size,
|
||||
};
|
||||
|
||||
// error!("put_object_part ret {:?}", &ret);
|
||||
@@ -4932,7 +4978,7 @@ impl StorageAPI for SetDisks {
|
||||
// complete_multipart_upload 完成
|
||||
#[tracing::instrument(skip(self))]
|
||||
async fn complete_multipart_upload(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
bucket: &str,
|
||||
object: &str,
|
||||
upload_id: &str,
|
||||
@@ -4974,12 +5020,15 @@ impl StorageAPI for SetDisks {
|
||||
for (i, res) in part_files_resp.iter().enumerate() {
|
||||
let part_id = uploaded_parts[i].part_num;
|
||||
if !res.error.is_empty() || !res.exists {
|
||||
// error!("complete_multipart_upload part_id err {:?}", res);
|
||||
error!("complete_multipart_upload part_id err {:?}, exists={}", res, res.exists);
|
||||
return Err(Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned()));
|
||||
}
|
||||
|
||||
let part_fi = FileInfo::unmarshal(&res.data).map_err(|_e| {
|
||||
// error!("complete_multipart_upload FileInfo::unmarshal err {:?}", e);
|
||||
let part_fi = FileInfo::unmarshal(&res.data).map_err(|e| {
|
||||
error!(
|
||||
"complete_multipart_upload FileInfo::unmarshal err {:?}, part_id={}, bucket={}, object={}",
|
||||
e, part_id, bucket, object
|
||||
);
|
||||
Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned())
|
||||
})?;
|
||||
let part = &part_fi.parts[0];
|
||||
@@ -4989,11 +5038,18 @@ impl StorageAPI for SetDisks {
|
||||
// debug!("complete part {} object info {:?}", part_num, &part);
|
||||
|
||||
if part_id != part_num {
|
||||
// error!("complete_multipart_upload part_id err part_id != part_num {} != {}", part_id, part_num);
|
||||
error!("complete_multipart_upload part_id err part_id != part_num {} != {}", part_id, part_num);
|
||||
return Err(Error::InvalidPart(part_id, bucket.to_owned(), object.to_owned()));
|
||||
}
|
||||
|
||||
fi.add_object_part(part.number, part.etag.clone(), part.size, part.mod_time, part.actual_size);
|
||||
fi.add_object_part(
|
||||
part.number,
|
||||
part.etag.clone(),
|
||||
part.size,
|
||||
part.mod_time,
|
||||
part.actual_size,
|
||||
part.index.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
let (shuffle_disks, mut parts_metadatas) = Self::shuffle_disks_and_parts_metadata_by_index(&disks, &files_metas, &fi);
|
||||
@@ -5003,24 +5059,35 @@ impl StorageAPI for SetDisks {
|
||||
fi.parts = Vec::with_capacity(uploaded_parts.len());
|
||||
|
||||
let mut object_size: usize = 0;
|
||||
let mut object_actual_size: usize = 0;
|
||||
let mut object_actual_size: i64 = 0;
|
||||
|
||||
for (i, p) in uploaded_parts.iter().enumerate() {
|
||||
let has_part = curr_fi.parts.iter().find(|v| v.number == p.part_num);
|
||||
if has_part.is_none() {
|
||||
// error!("complete_multipart_upload has_part.is_none() {:?}", has_part);
|
||||
error!(
|
||||
"complete_multipart_upload has_part.is_none() {:?}, part_id={}, bucket={}, object={}",
|
||||
has_part, p.part_num, bucket, object
|
||||
);
|
||||
return Err(Error::InvalidPart(p.part_num, "".to_owned(), p.etag.clone().unwrap_or_default()));
|
||||
}
|
||||
|
||||
let ext_part = &curr_fi.parts[i];
|
||||
|
||||
if p.etag != Some(ext_part.etag.clone()) {
|
||||
error!(
|
||||
"complete_multipart_upload etag err {:?}, part_id={}, bucket={}, object={}",
|
||||
p.etag, p.part_num, bucket, object
|
||||
);
|
||||
return Err(Error::InvalidPart(p.part_num, ext_part.etag.clone(), p.etag.clone().unwrap_or_default()));
|
||||
}
|
||||
|
||||
// TODO: crypto
|
||||
|
||||
if (i < uploaded_parts.len() - 1) && !is_min_allowed_part_size(ext_part.size) {
|
||||
if (i < uploaded_parts.len() - 1) && !is_min_allowed_part_size(ext_part.actual_size) {
|
||||
error!(
|
||||
"complete_multipart_upload is_min_allowed_part_size err {:?}, part_id={}, bucket={}, object={}",
|
||||
ext_part.actual_size, p.part_num, bucket, object
|
||||
);
|
||||
return Err(Error::InvalidPart(p.part_num, ext_part.etag.clone(), p.etag.clone().unwrap_or_default()));
|
||||
}
|
||||
|
||||
@@ -5033,11 +5100,12 @@ impl StorageAPI for SetDisks {
|
||||
size: ext_part.size,
|
||||
mod_time: ext_part.mod_time,
|
||||
actual_size: ext_part.actual_size,
|
||||
index: ext_part.index.clone(),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
fi.size = object_size;
|
||||
fi.size = object_size as i64;
|
||||
fi.mod_time = opts.mod_time;
|
||||
if fi.mod_time.is_none() {
|
||||
fi.mod_time = Some(OffsetDateTime::now_utc());
|
||||
@@ -5054,6 +5122,18 @@ impl StorageAPI for SetDisks {
|
||||
|
||||
fi.metadata.insert("etag".to_owned(), etag);
|
||||
|
||||
fi.metadata
|
||||
.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER), object_actual_size.to_string());
|
||||
|
||||
if fi.is_compressed() {
|
||||
fi.metadata
|
||||
.insert(format!("{}compression-size", RESERVED_METADATA_PREFIX_LOWER), object_size.to_string());
|
||||
}
|
||||
|
||||
if opts.data_movement {
|
||||
fi.set_data_moved();
|
||||
}
|
||||
|
||||
// TODO: object_actual_size
|
||||
let _ = object_actual_size;
|
||||
|
||||
@@ -5125,17 +5205,6 @@ impl StorageAPI for SetDisks {
|
||||
)
|
||||
.await?;
|
||||
|
||||
for (i, op_disk) in online_disks.iter().enumerate() {
|
||||
if let Some(disk) = op_disk {
|
||||
if disk.is_online().await {
|
||||
fi = parts_metadatas[i].clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fi.is_latest = true;
|
||||
|
||||
// debug!("complete fileinfo {:?}", &fi);
|
||||
|
||||
// TODO: reduce_common_data_dir
|
||||
@@ -5157,7 +5226,22 @@ impl StorageAPI for SetDisks {
|
||||
.await;
|
||||
}
|
||||
|
||||
let _ = self.delete_all(RUSTFS_META_MULTIPART_BUCKET, &upload_id_path).await;
|
||||
let upload_id_path = upload_id_path.clone();
|
||||
let store = self.clone();
|
||||
let _cleanup_handle = tokio::spawn(async move {
|
||||
let _ = store.delete_all(RUSTFS_META_MULTIPART_BUCKET, &upload_id_path).await;
|
||||
});
|
||||
|
||||
for (i, op_disk) in online_disks.iter().enumerate() {
|
||||
if let Some(disk) = op_disk {
|
||||
if disk.is_online().await {
|
||||
fi = parts_metadatas[i].clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fi.is_latest = true;
|
||||
|
||||
Ok(ObjectInfo::from_file_info(&fi, bucket, object, opts.versioned || opts.version_suspended))
|
||||
}
|
||||
@@ -5517,7 +5601,7 @@ async fn disks_with_all_parts(
|
||||
let verify_err = bitrot_verify(
|
||||
Box::new(Cursor::new(data.clone())),
|
||||
data_len,
|
||||
meta.erasure.shard_file_size(meta.size),
|
||||
meta.erasure.shard_file_size(meta.size) as usize,
|
||||
checksum_info.algorithm,
|
||||
checksum_info.hash,
|
||||
meta.erasure.shard_size(),
|
||||
@@ -5729,8 +5813,8 @@ pub async fn stat_all_dirs(disks: &[Option<DiskStore>], bucket: &str, prefix: &s
|
||||
}
|
||||
|
||||
const GLOBAL_MIN_PART_SIZE: ByteSize = ByteSize::mib(5);
|
||||
fn is_min_allowed_part_size(size: usize) -> bool {
|
||||
size as u64 >= GLOBAL_MIN_PART_SIZE.as_u64()
|
||||
fn is_min_allowed_part_size(size: i64) -> bool {
|
||||
size >= GLOBAL_MIN_PART_SIZE.as_u64() as i64
|
||||
}
|
||||
|
||||
fn get_complete_multipart_md5(parts: &[CompletePart]) -> String {
|
||||
|
||||
@@ -627,7 +627,7 @@ impl StorageAPI for Sets {
|
||||
|
||||
#[tracing::instrument(skip(self))]
|
||||
async fn complete_multipart_upload(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
bucket: &str,
|
||||
object: &str,
|
||||
upload_id: &str,
|
||||
|
||||
@@ -1233,7 +1233,7 @@ impl ObjectIO for ECStore {
|
||||
return self.pools[0].put_object(bucket, object.as_str(), data, opts).await;
|
||||
}
|
||||
|
||||
let idx = self.get_pool_idx(bucket, &object, data.content_length as i64).await?;
|
||||
let idx = self.get_pool_idx(bucket, &object, data.size()).await?;
|
||||
|
||||
if opts.data_movement && idx == opts.src_pool_idx {
|
||||
return Err(StorageError::DataMovementOverwriteErr(
|
||||
@@ -1508,9 +1508,7 @@ impl StorageAPI for ECStore {
|
||||
|
||||
// TODO: nslock
|
||||
|
||||
let pool_idx = self
|
||||
.get_pool_idx_no_lock(src_bucket, &src_object, src_info.size as i64)
|
||||
.await?;
|
||||
let pool_idx = self.get_pool_idx_no_lock(src_bucket, &src_object, src_info.size).await?;
|
||||
|
||||
if cp_src_dst_same {
|
||||
if let (Some(src_vid), Some(dst_vid)) = (&src_opts.version_id, &dst_opts.version_id) {
|
||||
@@ -1995,7 +1993,7 @@ impl StorageAPI for ECStore {
|
||||
|
||||
#[tracing::instrument(skip(self))]
|
||||
async fn complete_multipart_upload(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
bucket: &str,
|
||||
object: &str,
|
||||
upload_id: &str,
|
||||
@@ -2006,6 +2004,7 @@ impl StorageAPI for ECStore {
|
||||
|
||||
if self.single_pool() {
|
||||
return self.pools[0]
|
||||
.clone()
|
||||
.complete_multipart_upload(bucket, object, upload_id, uploaded_parts, opts)
|
||||
.await;
|
||||
}
|
||||
@@ -2015,6 +2014,7 @@ impl StorageAPI for ECStore {
|
||||
continue;
|
||||
}
|
||||
|
||||
let pool = pool.clone();
|
||||
let err = match pool
|
||||
.complete_multipart_upload(bucket, object, upload_id, uploaded_parts.clone(), opts)
|
||||
.await
|
||||
|
||||
@@ -7,24 +7,24 @@ use crate::store_utils::clean_metadata;
|
||||
use crate::{disk::DiskStore, heal::heal_commands::HealOpts};
|
||||
use http::{HeaderMap, HeaderValue};
|
||||
use madmin::heal_commands::HealResultItem;
|
||||
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
|
||||
use rustfs_filemeta::{FileInfo, MetaCacheEntriesSorted, ObjectPartInfo, headers::AMZ_OBJECT_TAGGING};
|
||||
use rustfs_rio::{HashReader, Reader};
|
||||
use rustfs_rio::{DecompressReader, HashReader, LimitReader, WarpReader};
|
||||
use rustfs_utils::CompressionAlgorithm;
|
||||
use rustfs_utils::path::decode_dir_object;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::io::Cursor;
|
||||
use std::str::FromStr as _;
|
||||
use std::sync::Arc;
|
||||
use time::OffsetDateTime;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
|
||||
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
|
||||
pub const RESERVED_METADATA_PREFIX: &str = "X-Rustfs-Internal-";
|
||||
pub const RESERVED_METADATA_PREFIX_LOWER: &str = "x-rustfs-internal-";
|
||||
pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
|
||||
pub const RUSTFS_DATA_MOVE: &str = "X-Rustfs-Internal-data-mov";
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct MakeBucketOptions {
|
||||
@@ -53,46 +53,50 @@ pub struct DeleteBucketOptions {
|
||||
|
||||
pub struct PutObjReader {
|
||||
pub stream: HashReader,
|
||||
pub content_length: usize,
|
||||
}
|
||||
|
||||
impl Debug for PutObjReader {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("PutObjReader")
|
||||
.field("content_length", &self.content_length)
|
||||
.finish()
|
||||
f.debug_struct("PutObjReader").finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PutObjReader {
|
||||
pub fn new(stream: HashReader, content_length: usize) -> Self {
|
||||
PutObjReader { stream, content_length }
|
||||
pub fn new(stream: HashReader) -> Self {
|
||||
PutObjReader { stream }
|
||||
}
|
||||
|
||||
pub fn from_vec(data: Vec<u8>) -> Self {
|
||||
let content_length = data.len();
|
||||
let content_length = data.len() as i64;
|
||||
PutObjReader {
|
||||
stream: HashReader::new(Box::new(Cursor::new(data)), content_length as i64, content_length as i64, None, false)
|
||||
stream: HashReader::new(Box::new(WarpReader::new(Cursor::new(data))), content_length, content_length, None, false)
|
||||
.unwrap(),
|
||||
content_length,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn size(&self) -> i64 {
|
||||
self.stream.size()
|
||||
}
|
||||
|
||||
pub fn actual_size(&self) -> i64 {
|
||||
self.stream.actual_size()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GetObjectReader {
|
||||
pub stream: Box<dyn Reader>,
|
||||
pub stream: Box<dyn AsyncRead + Unpin + Send + Sync>,
|
||||
pub object_info: ObjectInfo,
|
||||
}
|
||||
|
||||
impl GetObjectReader {
|
||||
#[tracing::instrument(level = "debug", skip(reader))]
|
||||
pub fn new(
|
||||
reader: Box<dyn Reader>,
|
||||
reader: Box<dyn AsyncRead + Unpin + Send + Sync>,
|
||||
rs: Option<HTTPRangeSpec>,
|
||||
oi: &ObjectInfo,
|
||||
opts: &ObjectOptions,
|
||||
_h: &HeaderMap<HeaderValue>,
|
||||
) -> Result<(Self, usize, usize)> {
|
||||
) -> Result<(Self, usize, i64)> {
|
||||
let mut rs = rs;
|
||||
|
||||
if let Some(part_number) = opts.part_number {
|
||||
@@ -101,6 +105,47 @@ impl GetObjectReader {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO:Encrypted
|
||||
|
||||
let (algo, is_compressed) = oi.is_compressed_ok()?;
|
||||
|
||||
// TODO: check TRANSITION
|
||||
|
||||
if is_compressed {
|
||||
let actual_size = oi.get_actual_size()?;
|
||||
let (off, length) = (0, oi.size);
|
||||
let (_dec_off, dec_length) = (0, actual_size);
|
||||
if let Some(_rs) = rs {
|
||||
// TODO: range spec is not supported for compressed object
|
||||
return Err(Error::other("The requested range is not satisfiable"));
|
||||
// let (off, length) = rs.get_offset_length(actual_size)?;
|
||||
}
|
||||
|
||||
let dec_reader = DecompressReader::new(reader, algo);
|
||||
|
||||
let actual_size = if actual_size > 0 {
|
||||
actual_size as usize
|
||||
} else {
|
||||
return Err(Error::other(format!("invalid decompressed size {}", actual_size)));
|
||||
};
|
||||
|
||||
warn!("actual_size: {}", actual_size);
|
||||
let dec_reader = LimitReader::new(dec_reader, actual_size);
|
||||
|
||||
let mut oi = oi.clone();
|
||||
oi.size = dec_length;
|
||||
|
||||
warn!("oi.size: {}, off: {}, length: {}", oi.size, off, length);
|
||||
return Ok((
|
||||
GetObjectReader {
|
||||
stream: Box::new(dec_reader),
|
||||
object_info: oi,
|
||||
},
|
||||
off,
|
||||
length,
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(rs) = rs {
|
||||
let (off, length) = rs.get_offset_length(oi.size)?;
|
||||
|
||||
@@ -142,8 +187,8 @@ impl GetObjectReader {
|
||||
#[derive(Debug)]
|
||||
pub struct HTTPRangeSpec {
|
||||
pub is_suffix_length: bool,
|
||||
pub start: usize,
|
||||
pub end: Option<usize>,
|
||||
pub start: i64,
|
||||
pub end: i64,
|
||||
}
|
||||
|
||||
impl HTTPRangeSpec {
|
||||
@@ -152,29 +197,38 @@ impl HTTPRangeSpec {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut start = 0;
|
||||
let mut end = -1;
|
||||
let mut start = 0i64;
|
||||
let mut end = -1i64;
|
||||
for i in 0..oi.parts.len().min(part_number) {
|
||||
start = end + 1;
|
||||
end = start + oi.parts[i].size as i64 - 1
|
||||
end = start + (oi.parts[i].size as i64) - 1
|
||||
}
|
||||
|
||||
Some(HTTPRangeSpec {
|
||||
is_suffix_length: false,
|
||||
start: start as usize,
|
||||
end: { if end < 0 { None } else { Some(end as usize) } },
|
||||
start,
|
||||
end,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_offset_length(&self, res_size: usize) -> Result<(usize, usize)> {
|
||||
pub fn get_offset_length(&self, res_size: i64) -> Result<(usize, i64)> {
|
||||
let len = self.get_length(res_size)?;
|
||||
|
||||
let mut start = self.start;
|
||||
if self.is_suffix_length {
|
||||
start = res_size - self.start
|
||||
start = res_size + self.start;
|
||||
|
||||
if start < 0 {
|
||||
start = 0;
|
||||
}
|
||||
}
|
||||
Ok((start, len))
|
||||
Ok((start as usize, len))
|
||||
}
|
||||
pub fn get_length(&self, res_size: usize) -> Result<usize> {
|
||||
pub fn get_length(&self, res_size: i64) -> Result<i64> {
|
||||
if res_size < 0 {
|
||||
return Err(Error::other("The requested range is not satisfiable"));
|
||||
}
|
||||
|
||||
if self.is_suffix_length {
|
||||
let specified_len = self.start; // 假设 h.start 是一个 i64 类型
|
||||
let mut range_length = specified_len;
|
||||
@@ -190,8 +244,8 @@ impl HTTPRangeSpec {
|
||||
return Err(Error::other("The requested range is not satisfiable"));
|
||||
}
|
||||
|
||||
if let Some(end) = self.end {
|
||||
let mut end = end;
|
||||
if self.end > -1 {
|
||||
let mut end = self.end;
|
||||
if res_size <= end {
|
||||
end = res_size - 1;
|
||||
}
|
||||
@@ -200,7 +254,7 @@ impl HTTPRangeSpec {
|
||||
return Ok(range_length);
|
||||
}
|
||||
|
||||
if self.end.is_none() {
|
||||
if self.end == -1 {
|
||||
let range_length = res_size - self.start;
|
||||
return Ok(range_length);
|
||||
}
|
||||
@@ -276,6 +330,7 @@ pub struct PartInfo {
|
||||
pub last_mod: Option<OffsetDateTime>,
|
||||
pub size: usize,
|
||||
pub etag: Option<String>,
|
||||
pub actual_size: i64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
@@ -298,9 +353,9 @@ pub struct ObjectInfo {
|
||||
pub bucket: String,
|
||||
pub name: String,
|
||||
pub mod_time: Option<OffsetDateTime>,
|
||||
pub size: usize,
|
||||
pub size: i64,
|
||||
// Actual size is the real size of the object uploaded by client.
|
||||
pub actual_size: Option<usize>,
|
||||
pub actual_size: i64,
|
||||
pub is_dir: bool,
|
||||
pub user_defined: Option<HashMap<String, String>>,
|
||||
pub parity_blocks: usize,
|
||||
@@ -364,27 +419,41 @@ impl Clone for ObjectInfo {
|
||||
impl ObjectInfo {
|
||||
pub fn is_compressed(&self) -> bool {
|
||||
if let Some(meta) = &self.user_defined {
|
||||
meta.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX))
|
||||
meta.contains_key(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_compressed_ok(&self) -> Result<(CompressionAlgorithm, bool)> {
|
||||
let scheme = self
|
||||
.user_defined
|
||||
.as_ref()
|
||||
.and_then(|meta| meta.get(&format!("{}compression", RESERVED_METADATA_PREFIX_LOWER)).cloned());
|
||||
|
||||
if let Some(scheme) = scheme {
|
||||
let algorithm = CompressionAlgorithm::from_str(&scheme)?;
|
||||
Ok((algorithm, true))
|
||||
} else {
|
||||
Ok((CompressionAlgorithm::None, false))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_multipart(&self) -> bool {
|
||||
self.etag.as_ref().is_some_and(|v| v.len() != 32)
|
||||
}
|
||||
|
||||
pub fn get_actual_size(&self) -> std::io::Result<usize> {
|
||||
if let Some(actual_size) = self.actual_size {
|
||||
return Ok(actual_size);
|
||||
pub fn get_actual_size(&self) -> std::io::Result<i64> {
|
||||
if self.actual_size > 0 {
|
||||
return Ok(self.actual_size);
|
||||
}
|
||||
|
||||
if self.is_compressed() {
|
||||
if let Some(meta) = &self.user_defined {
|
||||
if let Some(size_str) = meta.get(&format!("{}actual-size", RESERVED_METADATA_PREFIX)) {
|
||||
if let Some(size_str) = meta.get(&format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER)) {
|
||||
if !size_str.is_empty() {
|
||||
// Todo: deal with error
|
||||
let size = size_str.parse::<usize>().map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
let size = size_str.parse::<i64>().map_err(|e| std::io::Error::other(e.to_string()))?;
|
||||
return Ok(size);
|
||||
}
|
||||
}
|
||||
@@ -395,8 +464,9 @@ impl ObjectInfo {
|
||||
actual_size += part.actual_size;
|
||||
});
|
||||
if actual_size == 0 && actual_size != self.size {
|
||||
return Err(std::io::Error::other("invalid decompressed size"));
|
||||
return Err(std::io::Error::other(format!("invalid decompressed size {} {}", actual_size, self.size)));
|
||||
}
|
||||
|
||||
return Ok(actual_size);
|
||||
}
|
||||
|
||||
@@ -803,7 +873,7 @@ pub trait StorageAPI: ObjectIO {
|
||||
// ListObjectParts
|
||||
async fn abort_multipart_upload(&self, bucket: &str, object: &str, upload_id: &str, opts: &ObjectOptions) -> Result<()>;
|
||||
async fn complete_multipart_upload(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
bucket: &str,
|
||||
object: &str,
|
||||
upload_id: &str,
|
||||
|
||||
@@ -164,7 +164,7 @@ pub struct PutFileQuery {
|
||||
volume: String,
|
||||
path: String,
|
||||
append: bool,
|
||||
size: usize,
|
||||
size: i64,
|
||||
}
|
||||
pub struct PutFile {}
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -29,10 +29,15 @@ use ecstore::bucket::metadata_sys;
|
||||
use ecstore::bucket::policy_sys::PolicySys;
|
||||
use ecstore::bucket::tagging::decode_tags;
|
||||
use ecstore::bucket::tagging::encode_tags;
|
||||
use ecstore::bucket::utils::serialize;
|
||||
use ecstore::bucket::versioning_sys::BucketVersioningSys;
|
||||
use ecstore::cmd::bucket_replication::ReplicationStatusType;
|
||||
use ecstore::cmd::bucket_replication::ReplicationType;
|
||||
use ecstore::cmd::bucket_replication::get_must_replicate_options;
|
||||
use ecstore::cmd::bucket_replication::must_replicate;
|
||||
use ecstore::cmd::bucket_replication::schedule_replication;
|
||||
use ecstore::compress::MIN_COMPRESSIBLE_SIZE;
|
||||
use ecstore::compress::is_compressible;
|
||||
use ecstore::error::StorageError;
|
||||
use ecstore::new_object_layer_fn;
|
||||
use ecstore::set_disk::DEFAULT_READ_BUFFER_SIZE;
|
||||
@@ -46,12 +51,7 @@ use ecstore::store_api::ObjectIO;
|
||||
use ecstore::store_api::ObjectOptions;
|
||||
use ecstore::store_api::ObjectToDelete;
|
||||
use ecstore::store_api::PutObjReader;
|
||||
use ecstore::store_api::StorageAPI;
|
||||
// use ecstore::store_api::RESERVED_METADATA_PREFIX;
|
||||
use ecstore::bucket::utils::serialize;
|
||||
use ecstore::cmd::bucket_replication::ReplicationStatusType;
|
||||
use ecstore::cmd::bucket_replication::ReplicationType;
|
||||
use ecstore::store_api::RESERVED_METADATA_PREFIX_LOWER;
|
||||
use ecstore::store_api::StorageAPI; // use ecstore::store_api::RESERVED_METADATA_PREFIX;
|
||||
use futures::pin_mut;
|
||||
use futures::{Stream, StreamExt};
|
||||
use http::HeaderMap;
|
||||
@@ -63,8 +63,13 @@ use policy::policy::Validator;
|
||||
use policy::policy::action::Action;
|
||||
use policy::policy::action::S3Action;
|
||||
use query::instance::make_rustfsms;
|
||||
use rustfs_filemeta::headers::RESERVED_METADATA_PREFIX_LOWER;
|
||||
use rustfs_filemeta::headers::{AMZ_DECODED_CONTENT_LENGTH, AMZ_OBJECT_TAGGING};
|
||||
use rustfs_rio::CompressReader;
|
||||
use rustfs_rio::HashReader;
|
||||
use rustfs_rio::Reader;
|
||||
use rustfs_rio::WarpReader;
|
||||
use rustfs_utils::CompressionAlgorithm;
|
||||
use rustfs_utils::path::path_join_buf;
|
||||
use rustfs_zip::CompressionFormat;
|
||||
use s3s::S3;
|
||||
@@ -86,7 +91,6 @@ use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_tar::Archive;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tokio_util::io::StreamReader;
|
||||
use tracing::debug;
|
||||
use tracing::error;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
@@ -179,14 +183,31 @@ impl FS {
|
||||
fpath = format!("{}/{}", prefix, fpath);
|
||||
}
|
||||
|
||||
let size = f.header().size().unwrap_or_default() as usize;
|
||||
let mut size = f.header().size().unwrap_or_default() as i64;
|
||||
|
||||
println!("Extracted: {}, size {}", fpath, size);
|
||||
|
||||
// Wrap the tar entry with BufReader to make it compatible with Reader trait
|
||||
let reader = Box::new(tokio::io::BufReader::new(f));
|
||||
let hrd = HashReader::new(reader, size as i64, size as i64, None, false).map_err(ApiError::from)?;
|
||||
let mut reader = PutObjReader::new(hrd, size);
|
||||
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(f));
|
||||
|
||||
let mut metadata = HashMap::new();
|
||||
|
||||
let actual_size = size;
|
||||
|
||||
if is_compressible(&HeaderMap::new(), &fpath) && size > MIN_COMPRESSIBLE_SIZE as i64 {
|
||||
metadata.insert(
|
||||
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
|
||||
CompressionAlgorithm::default().to_string(),
|
||||
);
|
||||
metadata.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER,), size.to_string());
|
||||
|
||||
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
|
||||
|
||||
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
|
||||
size = -1;
|
||||
}
|
||||
|
||||
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
|
||||
let mut reader = PutObjReader::new(hrd);
|
||||
|
||||
let _obj_info = store
|
||||
.put_object(&bucket, &fpath, &mut reader, &ObjectOptions::default())
|
||||
@@ -319,13 +340,10 @@ impl S3 for FS {
|
||||
src_info.metadata_only = true;
|
||||
}
|
||||
|
||||
let hrd = HashReader::new(gr.stream, gr.object_info.size as i64, gr.object_info.size as i64, None, false)
|
||||
.map_err(ApiError::from)?;
|
||||
let reader = Box::new(WarpReader::new(gr.stream));
|
||||
let hrd = HashReader::new(reader, gr.object_info.size, gr.object_info.size, None, false).map_err(ApiError::from)?;
|
||||
|
||||
src_info.put_object_reader = Some(PutObjReader {
|
||||
stream: hrd,
|
||||
content_length: gr.object_info.size as usize,
|
||||
});
|
||||
src_info.put_object_reader = Some(PutObjReader::new(hrd));
|
||||
|
||||
// check quota
|
||||
// TODO: src metadada
|
||||
@@ -536,13 +554,13 @@ impl S3 for FS {
|
||||
let rs = range.map(|v| match v {
|
||||
Range::Int { first, last } => HTTPRangeSpec {
|
||||
is_suffix_length: false,
|
||||
start: first as usize,
|
||||
end: last.map(|v| v as usize),
|
||||
start: first as i64,
|
||||
end: if let Some(last) = last { last as i64 } else { -1 },
|
||||
},
|
||||
Range::Suffix { length } => HTTPRangeSpec {
|
||||
is_suffix_length: true,
|
||||
start: length as usize,
|
||||
end: None,
|
||||
start: length as i64,
|
||||
end: -1,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -583,7 +601,7 @@ impl S3 for FS {
|
||||
|
||||
let body = Some(StreamingBlob::wrap(bytes_stream(
|
||||
ReaderStream::with_capacity(reader.stream, DEFAULT_READ_BUFFER_SIZE),
|
||||
info.size,
|
||||
info.size as usize,
|
||||
)));
|
||||
|
||||
let output = GetObjectOutput {
|
||||
@@ -637,13 +655,13 @@ impl S3 for FS {
|
||||
let rs = range.map(|v| match v {
|
||||
Range::Int { first, last } => HTTPRangeSpec {
|
||||
is_suffix_length: false,
|
||||
start: first as usize,
|
||||
end: last.map(|v| v as usize),
|
||||
start: first as i64,
|
||||
end: if let Some(last) = last { last as i64 } else { -1 },
|
||||
},
|
||||
Range::Suffix { length } => HTTPRangeSpec {
|
||||
is_suffix_length: true,
|
||||
start: length as usize,
|
||||
end: None,
|
||||
start: length as i64,
|
||||
end: -1,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -664,8 +682,8 @@ impl S3 for FS {
|
||||
// warn!("head_object info {:?}", &info);
|
||||
|
||||
let content_type = {
|
||||
if let Some(content_type) = info.content_type {
|
||||
match ContentType::from_str(&content_type) {
|
||||
if let Some(content_type) = &info.content_type {
|
||||
match ContentType::from_str(content_type) {
|
||||
Ok(res) => Some(res),
|
||||
Err(err) => {
|
||||
error!("parse content-type err {} {:?}", &content_type, err);
|
||||
@@ -679,10 +697,14 @@ impl S3 for FS {
|
||||
};
|
||||
let last_modified = info.mod_time.map(Timestamp::from);
|
||||
|
||||
// TODO: range download
|
||||
|
||||
let content_length = info.get_actual_size().map_err(ApiError::from)?;
|
||||
|
||||
let metadata = info.user_defined;
|
||||
|
||||
let output = HeadObjectOutput {
|
||||
content_length: Some(try_!(i64::try_from(info.size))),
|
||||
content_length: Some(content_length),
|
||||
content_type,
|
||||
last_modified,
|
||||
e_tag: info.etag,
|
||||
@@ -806,7 +828,7 @@ impl S3 for FS {
|
||||
let mut obj = Object {
|
||||
key: Some(v.name.to_owned()),
|
||||
last_modified: v.mod_time.map(Timestamp::from),
|
||||
size: Some(v.size as i64),
|
||||
size: Some(v.size),
|
||||
e_tag: v.etag.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
@@ -885,7 +907,7 @@ impl S3 for FS {
|
||||
ObjectVersion {
|
||||
key: Some(v.name.to_owned()),
|
||||
last_modified: v.mod_time.map(Timestamp::from),
|
||||
size: Some(v.size as i64),
|
||||
size: Some(v.size),
|
||||
version_id: v.version_id.map(|v| v.to_string()),
|
||||
is_latest: Some(v.is_latest),
|
||||
e_tag: v.etag.clone(),
|
||||
@@ -926,7 +948,6 @@ impl S3 for FS {
|
||||
return self.put_object_extract(req).await;
|
||||
}
|
||||
|
||||
info!("put object");
|
||||
let input = req.input;
|
||||
|
||||
if let Some(ref storage_class) = input.storage_class {
|
||||
@@ -949,7 +970,7 @@ impl S3 for FS {
|
||||
|
||||
let Some(body) = body else { return Err(s3_error!(IncompleteBody)) };
|
||||
|
||||
let content_length = match content_length {
|
||||
let mut size = match content_length {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
if let Some(val) = req.headers.get(AMZ_DECODED_CONTENT_LENGTH) {
|
||||
@@ -964,9 +985,6 @@ impl S3 for FS {
|
||||
};
|
||||
|
||||
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
|
||||
let body = Box::new(tokio::io::BufReader::new(body));
|
||||
let hrd = HashReader::new(body, content_length as i64, content_length as i64, None, false).map_err(ApiError::from)?;
|
||||
let mut reader = PutObjReader::new(hrd, content_length as usize);
|
||||
|
||||
// let body = Box::new(StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))));
|
||||
|
||||
@@ -984,10 +1002,32 @@ impl S3 for FS {
|
||||
metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags);
|
||||
}
|
||||
|
||||
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(body));
|
||||
|
||||
let actual_size = size;
|
||||
|
||||
if is_compressible(&req.headers, &key) && size > MIN_COMPRESSIBLE_SIZE as i64 {
|
||||
metadata.insert(
|
||||
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
|
||||
CompressionAlgorithm::default().to_string(),
|
||||
);
|
||||
metadata.insert(format!("{}actual-size", RESERVED_METADATA_PREFIX_LOWER,), size.to_string());
|
||||
|
||||
let hrd = HashReader::new(reader, size as i64, size as i64, None, false).map_err(ApiError::from)?;
|
||||
|
||||
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
|
||||
size = -1;
|
||||
}
|
||||
|
||||
// TODO: md5 check
|
||||
let reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
|
||||
|
||||
let mut reader = PutObjReader::new(reader);
|
||||
|
||||
let mt = metadata.clone();
|
||||
let mt2 = metadata.clone();
|
||||
|
||||
let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(mt))
|
||||
let mut opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(mt))
|
||||
.await
|
||||
.map_err(ApiError::from)?;
|
||||
|
||||
@@ -995,18 +1035,18 @@ impl S3 for FS {
|
||||
get_must_replicate_options(&mt2, "", ReplicationStatusType::Unknown, ReplicationType::ObjectReplicationType, &opts);
|
||||
|
||||
let dsc = must_replicate(&bucket, &key, &repoptions).await;
|
||||
warn!("dsc {}", &dsc.replicate_any().clone());
|
||||
// warn!("dsc {}", &dsc.replicate_any().clone());
|
||||
if dsc.replicate_any() {
|
||||
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp");
|
||||
let now: DateTime<Utc> = Utc::now();
|
||||
let formatted_time = now.to_rfc3339();
|
||||
metadata.insert(k, formatted_time);
|
||||
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status");
|
||||
metadata.insert(k, dsc.pending_status());
|
||||
if let Some(metadata) = opts.user_defined.as_mut() {
|
||||
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-timestamp");
|
||||
let now: DateTime<Utc> = Utc::now();
|
||||
let formatted_time = now.to_rfc3339();
|
||||
metadata.insert(k, formatted_time);
|
||||
let k = format!("{}{}", RESERVED_METADATA_PREFIX_LOWER, "replication-status");
|
||||
metadata.insert(k, dsc.pending_status());
|
||||
}
|
||||
}
|
||||
|
||||
debug!("put_object opts {:?}", &opts);
|
||||
|
||||
let obj_info = store
|
||||
.put_object(&bucket, &key, &mut reader, &opts)
|
||||
.await
|
||||
@@ -1058,6 +1098,13 @@ impl S3 for FS {
|
||||
metadata.insert(AMZ_OBJECT_TAGGING.to_owned(), tags);
|
||||
}
|
||||
|
||||
if is_compressible(&req.headers, &key) {
|
||||
metadata.insert(
|
||||
format!("{}compression", RESERVED_METADATA_PREFIX_LOWER),
|
||||
CompressionAlgorithm::default().to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let opts: ObjectOptions = put_opts(&bucket, &key, version_id, &req.headers, Some(metadata))
|
||||
.await
|
||||
.map_err(ApiError::from)?;
|
||||
@@ -1095,7 +1142,7 @@ impl S3 for FS {
|
||||
// let upload_id =
|
||||
|
||||
let body = body.ok_or_else(|| s3_error!(IncompleteBody))?;
|
||||
let content_length = match content_length {
|
||||
let mut size = match content_length {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
if let Some(val) = req.headers.get(AMZ_DECODED_CONTENT_LENGTH) {
|
||||
@@ -1110,21 +1157,42 @@ impl S3 for FS {
|
||||
};
|
||||
|
||||
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
|
||||
let body = Box::new(tokio::io::BufReader::new(body));
|
||||
let hrd = HashReader::new(body, content_length as i64, content_length as i64, None, false).map_err(ApiError::from)?;
|
||||
|
||||
// mc cp step 4
|
||||
let mut data = PutObjReader::new(hrd, content_length as usize);
|
||||
|
||||
let opts = ObjectOptions::default();
|
||||
|
||||
let Some(store) = new_object_layer_fn() else {
|
||||
return Err(S3Error::with_message(S3ErrorCode::InternalError, "Not init".to_string()));
|
||||
};
|
||||
|
||||
// TODO: hash_reader
|
||||
let fi = store
|
||||
.get_multipart_info(&bucket, &key, &upload_id, &opts)
|
||||
.await
|
||||
.map_err(ApiError::from)?;
|
||||
|
||||
let is_compressible = fi
|
||||
.user_defined
|
||||
.contains_key(format!("{}compression", RESERVED_METADATA_PREFIX_LOWER).as_str());
|
||||
|
||||
let mut reader: Box<dyn Reader> = Box::new(WarpReader::new(body));
|
||||
|
||||
let actual_size = size;
|
||||
|
||||
if is_compressible {
|
||||
let hrd = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
|
||||
|
||||
reader = Box::new(CompressReader::new(hrd, CompressionAlgorithm::default()));
|
||||
size = -1;
|
||||
}
|
||||
|
||||
// TODO: md5 check
|
||||
let reader = HashReader::new(reader, size, actual_size, None, false).map_err(ApiError::from)?;
|
||||
|
||||
let mut reader = PutObjReader::new(reader);
|
||||
|
||||
let info = store
|
||||
.put_object_part(&bucket, &key, &upload_id, part_id, &mut data, &opts)
|
||||
.put_object_part(&bucket, &key, &upload_id, part_id, &mut reader, &opts)
|
||||
.await
|
||||
.map_err(ApiError::from)?;
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ impl ObjectStore for EcObjectStore {
|
||||
let meta = ObjectMeta {
|
||||
location: location.clone(),
|
||||
last_modified: Utc::now(),
|
||||
size: reader.object_info.size,
|
||||
size: reader.object_info.size as usize,
|
||||
e_tag: reader.object_info.etag,
|
||||
version: None,
|
||||
};
|
||||
@@ -121,7 +121,7 @@ impl ObjectStore for EcObjectStore {
|
||||
ConvertStream::new(reader.stream, self.delimiter.clone()),
|
||||
DEFAULT_READ_BUFFER_SIZE,
|
||||
),
|
||||
reader.object_info.size,
|
||||
reader.object_info.size as usize,
|
||||
)
|
||||
.boxed(),
|
||||
)
|
||||
@@ -129,7 +129,7 @@ impl ObjectStore for EcObjectStore {
|
||||
object_store::GetResultPayload::Stream(
|
||||
bytes_stream(
|
||||
ReaderStream::with_capacity(reader.stream, DEFAULT_READ_BUFFER_SIZE),
|
||||
reader.object_info.size,
|
||||
reader.object_info.size as usize,
|
||||
)
|
||||
.boxed(),
|
||||
)
|
||||
@@ -137,7 +137,7 @@ impl ObjectStore for EcObjectStore {
|
||||
Ok(GetResult {
|
||||
payload,
|
||||
meta,
|
||||
range: 0..reader.object_info.size,
|
||||
range: 0..reader.object_info.size as usize,
|
||||
attributes,
|
||||
})
|
||||
}
|
||||
@@ -161,7 +161,7 @@ impl ObjectStore for EcObjectStore {
|
||||
Ok(ObjectMeta {
|
||||
location: location.clone(),
|
||||
last_modified: Utc::now(),
|
||||
size: info.size,
|
||||
size: info.size as usize,
|
||||
e_tag: info.etag,
|
||||
version: None,
|
||||
})
|
||||
|
||||
@@ -8,4 +8,5 @@ RUSTFS_CONSOLE_ADDRESS=":7001"
|
||||
RUST_LOG=warn
|
||||
RUSTFS_OBS_LOG_DIRECTORY="/var/logs/rustfs/"
|
||||
RUSTFS_NS_SCANNER_INTERVAL=60
|
||||
RUSTFS_SKIP_BACKGROUND_TASK=true
|
||||
#RUSTFS_SKIP_BACKGROUND_TASK=true
|
||||
RUSTFS_COMPRESSION_ENABLED=true
|
||||
@@ -19,7 +19,7 @@ mkdir -p ./target/volume/test{0..4}
|
||||
|
||||
if [ -z "$RUST_LOG" ]; then
|
||||
export RUST_BACKTRACE=1
|
||||
export RUST_LOG="rustfs=debug,ecstore=debug,s3s=debug,iam=debug"
|
||||
export RUST_LOG="rustfs=debug,ecstore=debug,s3s=debug,iam=debug"
|
||||
fi
|
||||
|
||||
# export RUSTFS_ERASURE_SET_DRIVE_COUNT=5
|
||||
@@ -72,6 +72,11 @@ export OTEL_INSTRUMENTATION_VERSION="0.1.1"
|
||||
export OTEL_INSTRUMENTATION_SCHEMA_URL="https://opentelemetry.io/schemas/1.31.0"
|
||||
export OTEL_INSTRUMENTATION_ATTRIBUTES="env=production"
|
||||
|
||||
export RUSTFS_NS_SCANNER_INTERVAL=60 # 对象扫描间隔时间,单位为秒
|
||||
# exportRUSTFS_SKIP_BACKGROUND_TASK=true
|
||||
|
||||
export RUSTFS_COMPRESSION_ENABLED=true # 是否启用压缩
|
||||
|
||||
# 事件消息配置
|
||||
#export RUSTFS_EVENT_CONFIG="./deploy/config/event.example.toml"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user