mirror of
https://github.com/rustfs/rustfs.git
synced 2026-01-16 17:20:33 +00:00
Fix large file upload freeze with adaptive buffer sizing (#869)
* Initial plan * Fix large file upload freeze by increasing StreamReader buffer size Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * Add comprehensive documentation for large file upload freeze fix Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * upgrade s3s version * Fix compilation error: use BufReader instead of non-existent StreamReader::with_capacity Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * Update documentation with correct BufReader implementation Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * add tokio feature `io-util` * Implement adaptive buffer sizing based on file size Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * Constants are managed uniformly and fmt code * fix * Fix: Trigger self-heal on read when shards missing from rejoined nodes (#871) * Initial plan * Fix: Trigger self-heal when missing shards detected during read - Added proactive heal detection in get_object_with_fileinfo - When reading an object, now checks if any shards are missing even if read succeeds - Sends low-priority heal request to reconstruct missing shards on rejoined nodes - This fixes the issue where data written during node outage is not healed when node rejoins Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * fix * Unify CRC implementations to crc-fast (#873) * Initial plan * Replace CRC libraries with unified crc-fast implementation Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> * fix * fix: replace low to Normal --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> Co-authored-by: houseme <housemecn@gmail.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme <housemecn@gmail.com> Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: houseme <4829346+houseme@users.noreply.github.com> Co-authored-by: houseme <housemecn@gmail.com>
This commit is contained in:
23
Cargo.lock
generated
23
Cargo.lock
generated
@@ -1871,15 +1871,6 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crc64fast-nvme"
|
|
||||||
version = "1.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "38fe9239af6a04e140c7424d36d1615f37f1804700c17d5339af162add9022e0"
|
|
||||||
dependencies = [
|
|
||||||
"crc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "criterion"
|
name = "criterion"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
@@ -7216,7 +7207,7 @@ version = "0.0.5"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"crc32fast",
|
"crc-fast",
|
||||||
"criterion",
|
"criterion",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -7436,9 +7427,7 @@ dependencies = [
|
|||||||
"aes-gcm",
|
"aes-gcm",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
"crc32c",
|
"crc-fast",
|
||||||
"crc32fast",
|
|
||||||
"crc64fast-nvme",
|
|
||||||
"faster-hex",
|
"faster-hex",
|
||||||
"futures",
|
"futures",
|
||||||
"hex-simd",
|
"hex-simd",
|
||||||
@@ -7545,7 +7534,7 @@ dependencies = [
|
|||||||
"brotli 8.0.2",
|
"brotli 8.0.2",
|
||||||
"bytes",
|
"bytes",
|
||||||
"convert_case",
|
"convert_case",
|
||||||
"crc32fast",
|
"crc-fast",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures",
|
"futures",
|
||||||
"hashbrown 0.16.0",
|
"hashbrown 0.16.0",
|
||||||
@@ -7789,7 +7778,7 @@ checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "s3s"
|
name = "s3s"
|
||||||
version = "0.12.0-rc.3"
|
version = "0.12.0-rc.3"
|
||||||
source = "git+https://github.com/s3s-project/s3s.git?rev=1ab064b#1ab064b6e8cfe0c36a2f763e9f6aa14b56592220"
|
source = "git+https://github.com/s3s-project/s3s.git?rev=ba9f902#ba9f902df5a0b68dc9b0eeed4f06625fc0633d20"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayvec",
|
"arrayvec",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -7800,9 +7789,7 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
"chrono",
|
"chrono",
|
||||||
"const-str",
|
"const-str",
|
||||||
"crc32c",
|
"crc-fast",
|
||||||
"crc32fast",
|
|
||||||
"crc64fast-nvme",
|
|
||||||
"futures",
|
"futures",
|
||||||
"hex-simd",
|
"hex-simd",
|
||||||
"hmac 0.13.0-rc.3",
|
"hmac 0.13.0-rc.3",
|
||||||
|
|||||||
@@ -143,9 +143,6 @@ argon2 = { version = "0.6.0-rc.2", features = ["std"] }
|
|||||||
blake3 = { version = "1.8.2" }
|
blake3 = { version = "1.8.2" }
|
||||||
chacha20poly1305 = { version = "0.11.0-rc.2" }
|
chacha20poly1305 = { version = "0.11.0-rc.2" }
|
||||||
crc-fast = "1.6.0"
|
crc-fast = "1.6.0"
|
||||||
crc32c = "0.6.8"
|
|
||||||
crc32fast = "1.5.0"
|
|
||||||
crc64fast-nvme = "1.2.1"
|
|
||||||
hmac = { version = "0.13.0-rc.3" }
|
hmac = { version = "0.13.0-rc.3" }
|
||||||
jsonwebtoken = { version = "10.2.0", features = ["rust_crypto"] }
|
jsonwebtoken = { version = "10.2.0", features = ["rust_crypto"] }
|
||||||
pbkdf2 = "0.13.0-rc.2"
|
pbkdf2 = "0.13.0-rc.2"
|
||||||
@@ -225,7 +222,7 @@ regex = { version = "1.12.2" }
|
|||||||
rumqttc = { version = "0.25.0" }
|
rumqttc = { version = "0.25.0" }
|
||||||
rust-embed = { version = "8.9.0" }
|
rust-embed = { version = "8.9.0" }
|
||||||
rustc-hash = { version = "2.1.1" }
|
rustc-hash = { version = "2.1.1" }
|
||||||
s3s = { git = "https://github.com/s3s-project/s3s.git", rev = "1ab064b", version = "0.12.0-rc.3", features = ["minio"] }
|
s3s = { git = "https://github.com/s3s-project/s3s.git", rev = "ba9f902", version = "0.12.0-rc.3", features = ["minio"] }
|
||||||
serial_test = "3.2.0"
|
serial_test = "3.2.0"
|
||||||
shadow-rs = { version = "1.4.0", default-features = false }
|
shadow-rs = { version = "1.4.0", default-features = false }
|
||||||
siphasher = "1.0.1"
|
siphasher = "1.0.1"
|
||||||
|
|||||||
@@ -68,6 +68,7 @@ use md5::{Digest as Md5Digest, Md5};
|
|||||||
use rand::{Rng, seq::SliceRandom};
|
use rand::{Rng, seq::SliceRandom};
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use rustfs_common::heal_channel::{DriveState, HealChannelPriority, HealItemType, HealOpts, HealScanMode, send_heal_disk};
|
use rustfs_common::heal_channel::{DriveState, HealChannelPriority, HealItemType, HealOpts, HealScanMode, send_heal_disk};
|
||||||
|
use rustfs_config::MI_B;
|
||||||
use rustfs_filemeta::{
|
use rustfs_filemeta::{
|
||||||
FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo,
|
FileInfo, FileMeta, FileMetaShallowVersion, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ObjectPartInfo,
|
||||||
RawFileInfo, ReplicationStatusType, VersionPurgeStatusType, file_info_from_raw, merge_file_meta_versions,
|
RawFileInfo, ReplicationStatusType, VersionPurgeStatusType, file_info_from_raw, merge_file_meta_versions,
|
||||||
@@ -111,7 +112,7 @@ use tracing::error;
|
|||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
pub const DEFAULT_READ_BUFFER_SIZE: usize = 1024 * 1024;
|
pub const DEFAULT_READ_BUFFER_SIZE: usize = MI_B; // 1 MiB = 1024 * 1024;
|
||||||
pub const MAX_PARTS_COUNT: usize = 10000;
|
pub const MAX_PARTS_COUNT: usize = 10000;
|
||||||
const DISK_ONLINE_TIMEOUT: Duration = Duration::from_secs(1);
|
const DISK_ONLINE_TIMEOUT: Duration = Duration::from_secs(1);
|
||||||
const DISK_HEALTH_CACHE_TTL: Duration = Duration::from_millis(750);
|
const DISK_HEALTH_CACHE_TTL: Duration = Duration::from_millis(750);
|
||||||
@@ -2212,7 +2213,7 @@ impl SetDisks {
|
|||||||
where
|
where
|
||||||
W: AsyncWrite + Send + Sync + Unpin + 'static,
|
W: AsyncWrite + Send + Sync + Unpin + 'static,
|
||||||
{
|
{
|
||||||
tracing::debug!(bucket, object, requested_length = length, offset, "get_object_with_fileinfo start");
|
debug!(bucket, object, requested_length = length, offset, "get_object_with_fileinfo start");
|
||||||
let (disks, files) = Self::shuffle_disks_and_parts_metadata_by_index(disks, &files, &fi);
|
let (disks, files) = Self::shuffle_disks_and_parts_metadata_by_index(disks, &files, &fi);
|
||||||
|
|
||||||
let total_size = fi.size as usize;
|
let total_size = fi.size as usize;
|
||||||
@@ -2237,27 +2238,20 @@ impl SetDisks {
|
|||||||
|
|
||||||
let (last_part_index, last_part_relative_offset) = fi.to_part_offset(end_offset)?;
|
let (last_part_index, last_part_relative_offset) = fi.to_part_offset(end_offset)?;
|
||||||
|
|
||||||
tracing::debug!(
|
debug!(
|
||||||
bucket,
|
bucket,
|
||||||
object,
|
object, offset, length, end_offset, part_index, last_part_index, last_part_relative_offset, "Multipart read bounds"
|
||||||
offset,
|
|
||||||
length,
|
|
||||||
end_offset,
|
|
||||||
part_index,
|
|
||||||
last_part_index,
|
|
||||||
last_part_relative_offset,
|
|
||||||
"Multipart read bounds"
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let erasure = erasure_coding::Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
|
let erasure = erasure_coding::Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
|
||||||
|
|
||||||
let part_indices: Vec<usize> = (part_index..=last_part_index).collect();
|
let part_indices: Vec<usize> = (part_index..=last_part_index).collect();
|
||||||
tracing::debug!(bucket, object, ?part_indices, "Multipart part indices to stream");
|
debug!(bucket, object, ?part_indices, "Multipart part indices to stream");
|
||||||
|
|
||||||
let mut total_read = 0;
|
let mut total_read = 0;
|
||||||
for current_part in part_indices {
|
for current_part in part_indices {
|
||||||
if total_read == length {
|
if total_read == length {
|
||||||
tracing::debug!(
|
debug!(
|
||||||
bucket,
|
bucket,
|
||||||
object,
|
object,
|
||||||
total_read,
|
total_read,
|
||||||
@@ -2279,7 +2273,7 @@ impl SetDisks {
|
|||||||
|
|
||||||
let read_offset = (part_offset / erasure.block_size) * erasure.shard_size();
|
let read_offset = (part_offset / erasure.block_size) * erasure.shard_size();
|
||||||
|
|
||||||
tracing::debug!(
|
debug!(
|
||||||
bucket,
|
bucket,
|
||||||
object,
|
object,
|
||||||
part_index = current_part,
|
part_index = current_part,
|
||||||
@@ -2334,12 +2328,39 @@ impl SetDisks {
|
|||||||
return Err(Error::other(format!("not enough disks to read: {errors:?}")));
|
return Err(Error::other(format!("not enough disks to read: {errors:?}")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if we have missing shards even though we can read successfully
|
||||||
|
// This happens when a node was offline during write and comes back online
|
||||||
|
let total_shards = erasure.data_shards + erasure.parity_shards;
|
||||||
|
let missing_shards = total_shards - nil_count;
|
||||||
|
if missing_shards > 0 && nil_count >= erasure.data_shards {
|
||||||
|
// We have missing shards but enough to read - trigger background heal
|
||||||
|
info!(
|
||||||
|
bucket,
|
||||||
|
object,
|
||||||
|
part_number,
|
||||||
|
missing_shards,
|
||||||
|
available_shards = nil_count,
|
||||||
|
"Detected missing shards during read, triggering background heal"
|
||||||
|
);
|
||||||
|
let _ = rustfs_common::heal_channel::send_heal_request(
|
||||||
|
rustfs_common::heal_channel::create_heal_request_with_options(
|
||||||
|
bucket.to_string(),
|
||||||
|
Some(object.to_string()),
|
||||||
|
false,
|
||||||
|
Some(HealChannelPriority::Normal), // Use low priority for proactive healing
|
||||||
|
Some(pool_index),
|
||||||
|
Some(set_index),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
// debug!(
|
// debug!(
|
||||||
// "read part {} part_offset {},part_length {},part_size {} ",
|
// "read part {} part_offset {},part_length {},part_size {} ",
|
||||||
// part_number, part_offset, part_length, part_size
|
// part_number, part_offset, part_length, part_size
|
||||||
// );
|
// );
|
||||||
let (written, err) = erasure.decode(writer, readers, part_offset, part_length, part_size).await;
|
let (written, err) = erasure.decode(writer, readers, part_offset, part_length, part_size).await;
|
||||||
tracing::debug!(
|
debug!(
|
||||||
bucket,
|
bucket,
|
||||||
object,
|
object,
|
||||||
part_index = current_part,
|
part_index = current_part,
|
||||||
@@ -2386,7 +2407,7 @@ impl SetDisks {
|
|||||||
|
|
||||||
// debug!("read end");
|
// debug!("read end");
|
||||||
|
|
||||||
tracing::debug!(bucket, object, total_read, expected_length = length, "Multipart read finished");
|
debug!(bucket, object, total_read, expected_length = length, "Multipart read finished");
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -5663,7 +5684,7 @@ impl StorageAPI for SetDisks {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let ext_part = &curr_fi.parts[i];
|
let ext_part = &curr_fi.parts[i];
|
||||||
tracing::info!(target:"rustfs_ecstore::set_disk", part_number = p.part_num, part_size = ext_part.size, part_actual_size = ext_part.actual_size, "Completing multipart part");
|
info!(target:"rustfs_ecstore::set_disk", part_number = p.part_num, part_size = ext_part.size, part_actual_size = ext_part.actual_size, "Completing multipart part");
|
||||||
|
|
||||||
// Normalize ETags by removing quotes before comparison (PR #592 compatibility)
|
// Normalize ETags by removing quotes before comparison (PR #592 compatibility)
|
||||||
let client_etag = p.etag.as_ref().map(|e| rustfs_utils::path::trim_etag(e));
|
let client_etag = p.etag.as_ref().map(|e| rustfs_utils::path::trim_etag(e));
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ categories = ["web-programming", "development-tools", "filesystem"]
|
|||||||
documentation = "https://docs.rs/rustfs-filemeta/latest/rustfs_filemeta/"
|
documentation = "https://docs.rs/rustfs-filemeta/latest/rustfs_filemeta/"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
crc32fast = { workspace = true }
|
crc-fast = { workspace = true }
|
||||||
rmp.workspace = true
|
rmp.workspace = true
|
||||||
rmp-serde.workspace = true
|
rmp-serde.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
|
|||||||
@@ -220,7 +220,11 @@ impl FileInfo {
|
|||||||
let indices = {
|
let indices = {
|
||||||
let cardinality = data_blocks + parity_blocks;
|
let cardinality = data_blocks + parity_blocks;
|
||||||
let mut nums = vec![0; cardinality];
|
let mut nums = vec![0; cardinality];
|
||||||
let key_crc = crc32fast::hash(object.as_bytes());
|
let key_crc = {
|
||||||
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(object.as_bytes());
|
||||||
|
hasher.finalize() as u32
|
||||||
|
};
|
||||||
|
|
||||||
let start = key_crc as usize % cardinality;
|
let start = key_crc as usize % cardinality;
|
||||||
for i in 1..=cardinality {
|
for i in 1..=cardinality {
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ tokio = { workspace = true, features = ["full"] }
|
|||||||
rand = { workspace = true }
|
rand = { workspace = true }
|
||||||
http.workspace = true
|
http.workspace = true
|
||||||
aes-gcm = { workspace = true }
|
aes-gcm = { workspace = true }
|
||||||
crc32fast = { workspace = true }
|
crc-fast = { workspace = true }
|
||||||
pin-project-lite.workspace = true
|
pin-project-lite.workspace = true
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
bytes.workspace = true
|
bytes.workspace = true
|
||||||
@@ -49,10 +49,8 @@ thiserror.workspace = true
|
|||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
sha1.workspace = true
|
sha1.workspace = true
|
||||||
sha2.workspace = true
|
sha2.workspace = true
|
||||||
crc64fast-nvme.workspace = true
|
|
||||||
s3s.workspace = true
|
s3s.workspace = true
|
||||||
hex-simd.workspace = true
|
hex-simd.workspace = true
|
||||||
crc32c.workspace = true
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio-test = { workspace = true }
|
tokio-test = { workspace = true }
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
use crate::errors::ChecksumMismatch;
|
use crate::errors::ChecksumMismatch;
|
||||||
use base64::{Engine as _, engine::general_purpose};
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use crc32fast::Hasher as Crc32Hasher;
|
|
||||||
use http::HeaderMap;
|
use http::HeaderMap;
|
||||||
use sha1::Sha1;
|
use sha1::Sha1;
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
@@ -612,7 +611,7 @@ pub trait ChecksumHasher: Write + Send + Sync {
|
|||||||
|
|
||||||
/// CRC32 IEEE hasher
|
/// CRC32 IEEE hasher
|
||||||
pub struct Crc32IeeeHasher {
|
pub struct Crc32IeeeHasher {
|
||||||
hasher: Crc32Hasher,
|
hasher: crc_fast::Digest,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Crc32IeeeHasher {
|
impl Default for Crc32IeeeHasher {
|
||||||
@@ -624,7 +623,7 @@ impl Default for Crc32IeeeHasher {
|
|||||||
impl Crc32IeeeHasher {
|
impl Crc32IeeeHasher {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
hasher: Crc32Hasher::new(),
|
hasher: crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -642,27 +641,36 @@ impl Write for Crc32IeeeHasher {
|
|||||||
|
|
||||||
impl ChecksumHasher for Crc32IeeeHasher {
|
impl ChecksumHasher for Crc32IeeeHasher {
|
||||||
fn finalize(&mut self) -> Vec<u8> {
|
fn finalize(&mut self) -> Vec<u8> {
|
||||||
self.hasher.clone().finalize().to_be_bytes().to_vec()
|
(self.hasher.clone().finalize() as u32).to_be_bytes().to_vec()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reset(&mut self) {
|
fn reset(&mut self) {
|
||||||
self.hasher = Crc32Hasher::new();
|
self.hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CRC32 Castagnoli hasher
|
/// CRC32 Castagnoli hasher
|
||||||
#[derive(Default)]
|
pub struct Crc32CastagnoliHasher {
|
||||||
pub struct Crc32CastagnoliHasher(u32);
|
hasher: crc_fast::Digest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Crc32CastagnoliHasher {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Crc32CastagnoliHasher {
|
impl Crc32CastagnoliHasher {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self::default()
|
Self {
|
||||||
|
hasher: crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32Iscsi),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Write for Crc32CastagnoliHasher {
|
impl Write for Crc32CastagnoliHasher {
|
||||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
self.0 = crc32c::crc32c_append(self.0, buf);
|
self.hasher.update(buf);
|
||||||
Ok(buf.len())
|
Ok(buf.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -673,11 +681,11 @@ impl Write for Crc32CastagnoliHasher {
|
|||||||
|
|
||||||
impl ChecksumHasher for Crc32CastagnoliHasher {
|
impl ChecksumHasher for Crc32CastagnoliHasher {
|
||||||
fn finalize(&mut self) -> Vec<u8> {
|
fn finalize(&mut self) -> Vec<u8> {
|
||||||
self.0.to_be_bytes().to_vec()
|
(self.hasher.clone().finalize() as u32).to_be_bytes().to_vec()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reset(&mut self) {
|
fn reset(&mut self) {
|
||||||
self.0 = 0;
|
self.hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32Iscsi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -758,22 +766,27 @@ impl ChecksumHasher for Sha256Hasher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// CRC64 NVME hasher
|
/// CRC64 NVME hasher
|
||||||
#[derive(Default)]
|
|
||||||
pub struct Crc64NvmeHasher {
|
pub struct Crc64NvmeHasher {
|
||||||
hasher: crc64fast_nvme::Digest,
|
hasher: crc_fast::Digest,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Crc64NvmeHasher {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Crc64NvmeHasher {
|
impl Crc64NvmeHasher {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
hasher: Default::default(),
|
hasher: crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc64Nvme),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Write for Crc64NvmeHasher {
|
impl Write for Crc64NvmeHasher {
|
||||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
self.hasher.write(buf);
|
self.hasher.update(buf);
|
||||||
Ok(buf.len())
|
Ok(buf.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -784,11 +797,11 @@ impl Write for Crc64NvmeHasher {
|
|||||||
|
|
||||||
impl ChecksumHasher for Crc64NvmeHasher {
|
impl ChecksumHasher for Crc64NvmeHasher {
|
||||||
fn finalize(&mut self) -> Vec<u8> {
|
fn finalize(&mut self) -> Vec<u8> {
|
||||||
self.hasher.sum64().to_be_bytes().to_vec()
|
self.hasher.clone().finalize().to_be_bytes().to_vec()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reset(&mut self) {
|
fn reset(&mut self) {
|
||||||
self.hasher = Default::default();
|
self.hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc64Nvme);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -356,7 +356,11 @@ where
|
|||||||
*this.compressed_len = 0;
|
*this.compressed_len = 0;
|
||||||
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Decompressed length mismatch")));
|
return Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Decompressed length mismatch")));
|
||||||
}
|
}
|
||||||
let actual_crc = crc32fast::hash(&decompressed);
|
let actual_crc = {
|
||||||
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(&decompressed);
|
||||||
|
hasher.finalize() as u32
|
||||||
|
};
|
||||||
if actual_crc != crc {
|
if actual_crc != crc {
|
||||||
// error!("DecompressReader CRC32 mismatch: actual {actual_crc} != expected {crc}");
|
// error!("DecompressReader CRC32 mismatch: actual {actual_crc} != expected {crc}");
|
||||||
this.compressed_buf.take();
|
this.compressed_buf.take();
|
||||||
@@ -404,7 +408,11 @@ where
|
|||||||
|
|
||||||
/// Build compressed block with header + uvarint + compressed data
|
/// Build compressed block with header + uvarint + compressed data
|
||||||
fn build_compressed_block(uncompressed_data: &[u8], compression_algorithm: CompressionAlgorithm) -> Vec<u8> {
|
fn build_compressed_block(uncompressed_data: &[u8], compression_algorithm: CompressionAlgorithm) -> Vec<u8> {
|
||||||
let crc = crc32fast::hash(uncompressed_data);
|
let crc = {
|
||||||
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(uncompressed_data);
|
||||||
|
hasher.finalize() as u32
|
||||||
|
};
|
||||||
let compressed_data = compress_block(uncompressed_data, compression_algorithm);
|
let compressed_data = compress_block(uncompressed_data, compression_algorithm);
|
||||||
let uncompressed_len = uncompressed_data.len();
|
let uncompressed_len = uncompressed_data.len();
|
||||||
let mut uncompressed_len_buf = [0u8; 10];
|
let mut uncompressed_len_buf = [0u8; 10];
|
||||||
|
|||||||
@@ -102,7 +102,11 @@ where
|
|||||||
let nonce = Nonce::try_from(this.nonce.as_slice()).map_err(|_| Error::other("invalid nonce length"))?;
|
let nonce = Nonce::try_from(this.nonce.as_slice()).map_err(|_| Error::other("invalid nonce length"))?;
|
||||||
let plaintext = &temp_buf.filled()[..n];
|
let plaintext = &temp_buf.filled()[..n];
|
||||||
let plaintext_len = plaintext.len();
|
let plaintext_len = plaintext.len();
|
||||||
let crc = crc32fast::hash(plaintext);
|
let crc = {
|
||||||
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(plaintext);
|
||||||
|
hasher.finalize() as u32
|
||||||
|
};
|
||||||
let ciphertext = cipher
|
let ciphertext = cipher
|
||||||
.encrypt(&nonce, plaintext)
|
.encrypt(&nonce, plaintext)
|
||||||
.map_err(|e| Error::other(format!("encrypt error: {e}")))?;
|
.map_err(|e| Error::other(format!("encrypt error: {e}")))?;
|
||||||
@@ -409,7 +413,11 @@ where
|
|||||||
return Poll::Ready(Err(Error::other("Plaintext length mismatch")));
|
return Poll::Ready(Err(Error::other("Plaintext length mismatch")));
|
||||||
}
|
}
|
||||||
|
|
||||||
let actual_crc = crc32fast::hash(&plaintext);
|
let actual_crc = {
|
||||||
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(&plaintext);
|
||||||
|
hasher.finalize() as u32
|
||||||
|
};
|
||||||
if actual_crc != crc {
|
if actual_crc != crc {
|
||||||
this.ciphertext_buf.take();
|
this.ciphertext_buf.take();
|
||||||
*this.ciphertext_read = 0;
|
*this.ciphertext_read = 0;
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ base64-simd = { workspace = true, optional = true }
|
|||||||
blake3 = { workspace = true, optional = true }
|
blake3 = { workspace = true, optional = true }
|
||||||
brotli = { workspace = true, optional = true }
|
brotli = { workspace = true, optional = true }
|
||||||
bytes = { workspace = true, optional = true }
|
bytes = { workspace = true, optional = true }
|
||||||
crc32fast = { workspace = true, optional = true }
|
crc-fast = { workspace = true, optional = true }
|
||||||
flate2 = { workspace = true, optional = true }
|
flate2 = { workspace = true, optional = true }
|
||||||
futures = { workspace = true, optional = true }
|
futures = { workspace = true, optional = true }
|
||||||
hashbrown = { workspace = true, optional = true }
|
hashbrown = { workspace = true, optional = true }
|
||||||
@@ -88,7 +88,7 @@ notify = ["dep:hyper", "dep:s3s", "dep:hashbrown", "dep:thiserror", "dep:serde",
|
|||||||
compress = ["dep:flate2", "dep:brotli", "dep:snap", "dep:lz4", "dep:zstd"]
|
compress = ["dep:flate2", "dep:brotli", "dep:snap", "dep:lz4", "dep:zstd"]
|
||||||
string = ["dep:regex", "dep:rand"]
|
string = ["dep:regex", "dep:rand"]
|
||||||
crypto = ["dep:base64-simd", "dep:hex-simd", "dep:hmac", "dep:hyper", "dep:sha1"]
|
crypto = ["dep:base64-simd", "dep:hex-simd", "dep:hmac", "dep:hyper", "dep:sha1"]
|
||||||
hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:siphasher", "dep:hex-simd", "dep:base64-simd", "dep:crc32fast"]
|
hash = ["dep:highway", "dep:md-5", "dep:sha2", "dep:blake3", "dep:serde", "dep:siphasher", "dep:hex-simd", "dep:base64-simd", "dep:crc-fast"]
|
||||||
os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities
|
os = ["dep:nix", "dep:tempfile", "winapi"] # operating system utilities
|
||||||
integration = [] # integration test features
|
integration = [] # integration test features
|
||||||
sys = ["dep:sysinfo"] # system information features
|
sys = ["dep:sysinfo"] # system information features
|
||||||
|
|||||||
@@ -115,7 +115,6 @@ impl HashAlgorithm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
use crc32fast::Hasher;
|
|
||||||
use siphasher::sip::SipHasher;
|
use siphasher::sip::SipHasher;
|
||||||
|
|
||||||
pub const EMPTY_STRING_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
|
pub const EMPTY_STRING_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
|
||||||
@@ -151,11 +150,9 @@ pub fn sip_hash(key: &str, cardinality: usize, id: &[u8; 16]) -> usize {
|
|||||||
/// A usize representing the bucket index
|
/// A usize representing the bucket index
|
||||||
///
|
///
|
||||||
pub fn crc_hash(key: &str, cardinality: usize) -> usize {
|
pub fn crc_hash(key: &str, cardinality: usize) -> usize {
|
||||||
let mut hasher = Hasher::new(); // Create a new hasher
|
let mut hasher = crc_fast::Digest::new(crc_fast::CrcAlgorithm::Crc32IsoHdlc);
|
||||||
|
hasher.update(key.as_bytes());
|
||||||
hasher.update(key.as_bytes()); // Update hash state, add data
|
let checksum = hasher.finalize() as u32;
|
||||||
|
|
||||||
let checksum = hasher.finalize();
|
|
||||||
|
|
||||||
checksum as usize % cardinality
|
checksum as usize % cardinality
|
||||||
}
|
}
|
||||||
|
|||||||
192
docs/fix-large-file-upload-freeze.md
Normal file
192
docs/fix-large-file-upload-freeze.md
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
# Fix for Large File Upload Freeze Issue
|
||||||
|
|
||||||
|
## Problem Description
|
||||||
|
|
||||||
|
When uploading large files (10GB-20GB) consecutively, uploads may freeze with the following error:
|
||||||
|
|
||||||
|
```
|
||||||
|
[2025-11-10 14:29:22.110443 +00:00] ERROR [s3s::service]
|
||||||
|
AwsChunkedStreamError: Underlying: error reading a body from connection
|
||||||
|
```
|
||||||
|
|
||||||
|
## Root Cause Analysis
|
||||||
|
|
||||||
|
### 1. Small Default Buffer Size
|
||||||
|
The issue was caused by using `tokio_util::io::StreamReader::new()` which has a default buffer size of only **8KB**. This is far too small for large file uploads and causes:
|
||||||
|
|
||||||
|
- **Excessive system calls**: For a 10GB file with 8KB buffer, approximately **1.3 million read operations** are required
|
||||||
|
- **High CPU overhead**: Each read involves AWS chunked encoding/decoding overhead
|
||||||
|
- **Memory allocation pressure**: Frequent small allocations and deallocations
|
||||||
|
- **Increased timeout risk**: Slow read pace can trigger connection timeouts
|
||||||
|
|
||||||
|
### 2. AWS Chunked Encoding Overhead
|
||||||
|
AWS S3 uses chunked transfer encoding which adds metadata to each chunk. With a small buffer:
|
||||||
|
- More chunks need to be processed
|
||||||
|
- More metadata parsing operations
|
||||||
|
- Higher probability of parsing errors or timeouts
|
||||||
|
|
||||||
|
### 3. Connection Timeout Under Load
|
||||||
|
When multiple large files are uploaded consecutively:
|
||||||
|
- Small buffers lead to slow data transfer rates
|
||||||
|
- Network connections may timeout waiting for data
|
||||||
|
- The s3s library reports "error reading a body from connection"
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
|
||||||
|
Wrap `StreamReader::new()` with `tokio::io::BufReader::with_capacity()` using a 1MB buffer size (`DEFAULT_READ_BUFFER_SIZE = 1024 * 1024`).
|
||||||
|
|
||||||
|
### Changes Made
|
||||||
|
|
||||||
|
Modified three critical locations in `rustfs/src/storage/ecfs.rs`:
|
||||||
|
|
||||||
|
1. **put_object** (line ~2338): Standard object upload
|
||||||
|
2. **put_object_extract** (line ~376): Archive file extraction and upload
|
||||||
|
3. **upload_part** (line ~2864): Multipart upload
|
||||||
|
|
||||||
|
### Before
|
||||||
|
```rust
|
||||||
|
let body = StreamReader::new(
|
||||||
|
body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### After
|
||||||
|
```rust
|
||||||
|
// Use a larger buffer size (1MB) for StreamReader to prevent chunked stream read timeouts
|
||||||
|
// when uploading large files (10GB+). The default 8KB buffer is too small and causes
|
||||||
|
// excessive syscalls and potential connection timeouts.
|
||||||
|
let body = tokio::io::BufReader::with_capacity(
|
||||||
|
DEFAULT_READ_BUFFER_SIZE,
|
||||||
|
StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))),
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Impact
|
||||||
|
|
||||||
|
### For a 10GB File Upload:
|
||||||
|
|
||||||
|
| Metric | Before (8KB buffer) | After (1MB buffer) | Improvement |
|
||||||
|
|--------|--------------------|--------------------|-------------|
|
||||||
|
| Read operations | ~1,310,720 | ~10,240 | **99.2% reduction** |
|
||||||
|
| System call overhead | High | Low | Significantly reduced |
|
||||||
|
| Memory allocations | Frequent small | Less frequent large | More efficient |
|
||||||
|
| Timeout risk | High | Low | Much more stable |
|
||||||
|
|
||||||
|
### Benefits
|
||||||
|
|
||||||
|
1. **Reduced System Calls**: ~99% reduction in read operations for large files
|
||||||
|
2. **Lower CPU Usage**: Less AWS chunked encoding/decoding overhead
|
||||||
|
3. **Better Memory Efficiency**: Fewer allocations and better cache locality
|
||||||
|
4. **Improved Reliability**: Significantly reduced timeout probability
|
||||||
|
5. **Higher Throughput**: Better network utilization
|
||||||
|
|
||||||
|
## Testing Recommendations
|
||||||
|
|
||||||
|
To verify the fix works correctly, test the following scenarios:
|
||||||
|
|
||||||
|
1. **Single Large File Upload**
|
||||||
|
- Upload a 10GB file
|
||||||
|
- Upload a 20GB file
|
||||||
|
- Monitor for timeout errors
|
||||||
|
|
||||||
|
2. **Consecutive Large File Uploads**
|
||||||
|
- Upload 5 files of 10GB each consecutively
|
||||||
|
- Upload 3 files of 20GB each consecutively
|
||||||
|
- Ensure no freezing or timeout errors
|
||||||
|
|
||||||
|
3. **Multipart Upload**
|
||||||
|
- Upload large files using multipart upload
|
||||||
|
- Test with various part sizes
|
||||||
|
- Verify all parts complete successfully
|
||||||
|
|
||||||
|
4. **Archive Extraction**
|
||||||
|
- Upload large tar/gzip files with X-Amz-Meta-Snowball-Auto-Extract
|
||||||
|
- Verify extraction completes without errors
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
After deployment, monitor these metrics:
|
||||||
|
|
||||||
|
- Upload completion rate for files > 1GB
|
||||||
|
- Average upload time for large files
|
||||||
|
- Frequency of chunked stream errors
|
||||||
|
- CPU usage during uploads
|
||||||
|
- Memory usage during uploads
|
||||||
|
|
||||||
|
## Related Configuration
|
||||||
|
|
||||||
|
The buffer size is defined in `crates/ecstore/src/set_disk.rs`:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub const DEFAULT_READ_BUFFER_SIZE: usize = 1024 * 1024; // 1 MB
|
||||||
|
```
|
||||||
|
|
||||||
|
This value is used consistently across the codebase for stream reading operations.
|
||||||
|
|
||||||
|
## Additional Considerations
|
||||||
|
|
||||||
|
### Implementation Details
|
||||||
|
|
||||||
|
The solution uses `tokio::io::BufReader` to wrap the `StreamReader`, as `tokio-util 0.7.17` does not provide a `StreamReader::with_capacity()` method. The `BufReader` provides the same buffering benefits while being compatible with the current tokio-util version.
|
||||||
|
|
||||||
|
### Adaptive Buffer Sizing (Implemented)
|
||||||
|
|
||||||
|
The fix now includes **dynamic adaptive buffer sizing** based on file size for optimal performance and memory usage:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
/// Calculate adaptive buffer size based on file size for optimal streaming performance.
|
||||||
|
fn get_adaptive_buffer_size(file_size: i64) -> usize {
|
||||||
|
match file_size {
|
||||||
|
// Unknown size or negative (chunked/streaming): use 1MB buffer for safety
|
||||||
|
size if size < 0 => 1024 * 1024,
|
||||||
|
// Small files (< 1MB): use 64KB to minimize memory overhead
|
||||||
|
size if size < 1_048_576 => 65_536,
|
||||||
|
// Medium files (1MB - 100MB): use 256KB for balanced performance
|
||||||
|
size if size < 104_857_600 => 262_144,
|
||||||
|
// Large files (>= 100MB): use 1MB buffer for maximum throughput
|
||||||
|
_ => 1024 * 1024,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- **Memory Efficiency**: Small files use smaller buffers (64KB), reducing memory overhead
|
||||||
|
- **Balanced Performance**: Medium files use 256KB buffers for optimal balance
|
||||||
|
- **Maximum Throughput**: Large files (100MB+) use 1MB buffers to minimize syscalls
|
||||||
|
- **Automatic Selection**: Buffer size is chosen automatically based on content-length
|
||||||
|
|
||||||
|
**Performance Impact by File Size**:
|
||||||
|
|
||||||
|
| File Size | Buffer Size | Memory Saved vs Fixed 1MB | Syscalls (approx) |
|
||||||
|
|-----------|-------------|--------------------------|-------------------|
|
||||||
|
| 100 KB | 64 KB | 960 KB (94% reduction) | ~2 |
|
||||||
|
| 10 MB | 256 KB | 768 KB (75% reduction) | ~40 |
|
||||||
|
| 100 MB | 1 MB | 0 KB (same) | ~100 |
|
||||||
|
| 10 GB | 1 MB | 0 KB (same) | ~10,240 |
|
||||||
|
|
||||||
|
### Future Improvements
|
||||||
|
|
||||||
|
1. **Connection Keep-Alive**: Ensure HTTP keep-alive is properly configured for consecutive uploads
|
||||||
|
|
||||||
|
2. **Rate Limiting**: Consider implementing upload rate limiting to prevent resource exhaustion
|
||||||
|
|
||||||
|
3. **Configurable Thresholds**: Make buffer size thresholds configurable via environment variables or config file
|
||||||
|
|
||||||
|
### Alternative Approaches Considered
|
||||||
|
|
||||||
|
1. **Increase s3s timeout**: Would only mask the problem, not fix the root cause
|
||||||
|
2. **Retry logic**: Would increase complexity and potentially make things worse
|
||||||
|
3. **Connection pooling**: Already handled by underlying HTTP stack
|
||||||
|
4. **Upgrade tokio-util**: Would provide `StreamReader::with_capacity()` but requires testing entire dependency tree
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- Issue: "Uploading files of 10GB or 20GB consecutively may cause the upload to freeze"
|
||||||
|
- Error: `AwsChunkedStreamError: Underlying: error reading a body from connection`
|
||||||
|
- Library: `tokio_util::io::StreamReader`
|
||||||
|
- Default buffer: 8KB (tokio_util default)
|
||||||
|
- New buffer: 1MB (`DEFAULT_READ_BUFFER_SIZE`)
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
This fix addresses the root cause of large file upload freezes by using an appropriately sized buffer for stream reading. The 1MB buffer significantly reduces system call overhead, improves throughput, and eliminates timeout issues during consecutive large file uploads.
|
||||||
@@ -73,7 +73,7 @@ http.workspace = true
|
|||||||
http-body.workspace = true
|
http-body.workspace = true
|
||||||
reqwest = { workspace = true }
|
reqwest = { workspace = true }
|
||||||
socket2 = { workspace = true }
|
socket2 = { workspace = true }
|
||||||
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "net", "signal", "process"] }
|
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "net", "signal", "process", "io-util"] }
|
||||||
tokio-rustls = { workspace = true }
|
tokio-rustls = { workspace = true }
|
||||||
tokio-stream.workspace = true
|
tokio-stream.workspace = true
|
||||||
tokio-util.workspace = true
|
tokio-util.workspace = true
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ use datafusion::arrow::{
|
|||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use http::{HeaderMap, StatusCode};
|
use http::{HeaderMap, StatusCode};
|
||||||
use metrics::counter;
|
use metrics::counter;
|
||||||
|
use rustfs_config::{KI_B, MI_B};
|
||||||
use rustfs_ecstore::{
|
use rustfs_ecstore::{
|
||||||
bucket::{
|
bucket::{
|
||||||
lifecycle::{
|
lifecycle::{
|
||||||
@@ -149,6 +150,32 @@ static RUSTFS_OWNER: LazyLock<Owner> = LazyLock::new(|| Owner {
|
|||||||
id: Some("c19050dbcee97fda828689dda99097a6321af2248fa760517237346e5d9c8a66".to_owned()),
|
id: Some("c19050dbcee97fda828689dda99097a6321af2248fa760517237346e5d9c8a66".to_owned()),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/// Calculate adaptive buffer size based on file size for optimal streaming performance.
|
||||||
|
///
|
||||||
|
/// This function implements adaptive buffering to balance memory usage and performance:
|
||||||
|
/// - Small files (< 1MB): 64KB buffer - minimize memory overhead
|
||||||
|
/// - Medium files (1MB-100MB): 256KB buffer - balanced approach
|
||||||
|
/// - Large files (>= 100MB): 1MB buffer - maximize throughput, minimize syscalls
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `file_size` - The size of the file in bytes, or -1 if unknown
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Optimal buffer size in bytes
|
||||||
|
///
|
||||||
|
fn get_adaptive_buffer_size(file_size: i64) -> usize {
|
||||||
|
match file_size {
|
||||||
|
// Unknown size or negative (chunked/streaming): use default large buffer for safety
|
||||||
|
size if size < 0 => DEFAULT_READ_BUFFER_SIZE,
|
||||||
|
// Small files (< 1MB): use 64KB to minimize memory overhead
|
||||||
|
size if size < MI_B as i64 => 64 * KI_B,
|
||||||
|
// Medium files (1MB - 100MB): use 256KB for balanced performance
|
||||||
|
size if size < (100 * MI_B) as i64 => 256 * KI_B,
|
||||||
|
// Large files (>= 100MB): use 1MB buffer for maximum throughput
|
||||||
|
_ => DEFAULT_READ_BUFFER_SIZE,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct FS {
|
pub struct FS {
|
||||||
// pub store: ECStore,
|
// pub store: ECStore,
|
||||||
@@ -370,8 +397,6 @@ impl FS {
|
|||||||
let event_version_id = version_id;
|
let event_version_id = version_id;
|
||||||
let Some(body) = body else { return Err(s3_error!(IncompleteBody)) };
|
let Some(body) = body else { return Err(s3_error!(IncompleteBody)) };
|
||||||
|
|
||||||
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
|
|
||||||
|
|
||||||
let size = match content_length {
|
let size = match content_length {
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
None => {
|
None => {
|
||||||
@@ -386,6 +411,16 @@ impl FS {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Use adaptive buffer sizing based on file size for optimal performance:
|
||||||
|
// - Small files (< 1MB): 64KB buffer to minimize memory overhead
|
||||||
|
// - Medium files (1MB-100MB): 256KB buffer for balanced performance
|
||||||
|
// - Large files (>= 100MB): 1MB buffer to prevent chunked stream read timeouts
|
||||||
|
let buffer_size = get_adaptive_buffer_size(size);
|
||||||
|
let body = tokio::io::BufReader::with_capacity(
|
||||||
|
buffer_size,
|
||||||
|
StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))),
|
||||||
|
);
|
||||||
|
|
||||||
let Some(ext) = Path::new(&key).extension().and_then(|s| s.to_str()) else {
|
let Some(ext) = Path::new(&key).extension().and_then(|s| s.to_str()) else {
|
||||||
return Err(s3_error!(InvalidArgument, "key extension not found"));
|
return Err(s3_error!(InvalidArgument, "key extension not found"));
|
||||||
};
|
};
|
||||||
@@ -2326,7 +2361,15 @@ impl S3 for FS {
|
|||||||
return Err(s3_error!(UnexpectedContent));
|
return Err(s3_error!(UnexpectedContent));
|
||||||
}
|
}
|
||||||
|
|
||||||
let body = StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
|
// Use adaptive buffer sizing based on file size for optimal performance:
|
||||||
|
// - Small files (< 1MB): 64KB buffer to minimize memory overhead
|
||||||
|
// - Medium files (1MB-100MB): 256KB buffer for balanced performance
|
||||||
|
// - Large files (>= 100MB): 1MB buffer to prevent chunked stream read timeouts
|
||||||
|
let buffer_size = get_adaptive_buffer_size(size);
|
||||||
|
let body = tokio::io::BufReader::with_capacity(
|
||||||
|
buffer_size,
|
||||||
|
StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))),
|
||||||
|
);
|
||||||
|
|
||||||
// let body = Box::new(StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))));
|
// let body = Box::new(StreamReader::new(body.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))));
|
||||||
|
|
||||||
@@ -2846,7 +2889,15 @@ impl S3 for FS {
|
|||||||
|
|
||||||
let mut size = size.ok_or_else(|| s3_error!(UnexpectedContent))?;
|
let mut size = size.ok_or_else(|| s3_error!(UnexpectedContent))?;
|
||||||
|
|
||||||
let body = StreamReader::new(body_stream.map(|f| f.map_err(|e| std::io::Error::other(e.to_string()))));
|
// Use adaptive buffer sizing based on part size for optimal performance:
|
||||||
|
// - Small parts (< 1MB): 64KB buffer to minimize memory overhead
|
||||||
|
// - Medium parts (1MB-100MB): 256KB buffer for balanced performance
|
||||||
|
// - Large parts (>= 100MB): 1MB buffer to prevent chunked stream read timeouts
|
||||||
|
let buffer_size = get_adaptive_buffer_size(size);
|
||||||
|
let body = tokio::io::BufReader::with_capacity(
|
||||||
|
buffer_size,
|
||||||
|
StreamReader::new(body_stream.map(|f| f.map_err(|e| std::io::Error::other(e.to_string())))),
|
||||||
|
);
|
||||||
|
|
||||||
// mc cp step 4
|
// mc cp step 4
|
||||||
|
|
||||||
@@ -4944,6 +4995,32 @@ mod tests {
|
|||||||
assert_eq!(gz_format.extension(), "gz");
|
assert_eq!(gz_format.extension(), "gz");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_adaptive_buffer_size() {
|
||||||
|
const KB: i64 = 1024;
|
||||||
|
const MB: i64 = 1024 * 1024;
|
||||||
|
|
||||||
|
// Test unknown/negative size (chunked/streaming)
|
||||||
|
assert_eq!(get_adaptive_buffer_size(-1), DEFAULT_READ_BUFFER_SIZE);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(-100), DEFAULT_READ_BUFFER_SIZE);
|
||||||
|
|
||||||
|
// Test small files (< 1MB) - should use 64KB
|
||||||
|
assert_eq!(get_adaptive_buffer_size(0), 64 * KB as usize);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(512 * KB), 64 * KB as usize);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(MB - 1), 64 * KB as usize);
|
||||||
|
|
||||||
|
// Test medium files (1MB - 100MB) - should use 256KB
|
||||||
|
assert_eq!(get_adaptive_buffer_size(MB), 256 * KB as usize);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(50 * MB), 256 * KB as usize);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(100 * MB - 1), 256 * KB as usize);
|
||||||
|
|
||||||
|
// Test large files (>= 100MB) - should use 1MB (DEFAULT_READ_BUFFER_SIZE)
|
||||||
|
assert_eq!(get_adaptive_buffer_size(100 * MB), DEFAULT_READ_BUFFER_SIZE);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(500 * MB), DEFAULT_READ_BUFFER_SIZE);
|
||||||
|
assert_eq!(get_adaptive_buffer_size(10 * 1024 * MB), DEFAULT_READ_BUFFER_SIZE); // 10GB
|
||||||
|
assert_eq!(get_adaptive_buffer_size(20 * 1024 * MB), DEFAULT_READ_BUFFER_SIZE); // 20GB
|
||||||
|
}
|
||||||
|
|
||||||
// Note: S3Request structure is complex and requires many fields.
|
// Note: S3Request structure is complex and requires many fields.
|
||||||
// For real testing, we would need proper integration test setup.
|
// For real testing, we would need proper integration test setup.
|
||||||
// Removing this test as it requires too much S3 infrastructure setup.
|
// Removing this test as it requires too much S3 infrastructure setup.
|
||||||
|
|||||||
Reference in New Issue
Block a user