Merge branch 'main' of github.com:rustfs/s3-rustfs into feature/observability

# Conflicts:
#	Cargo.lock
#	Cargo.toml
This commit is contained in:
houseme
2025-03-18 16:51:25 +08:00
55 changed files with 5054 additions and 3702 deletions

View File

@@ -1,2 +1,4 @@
[target.x86_64-unknown-linux-gnu]
rustflags = ["-Clink-arg=-fuse-ld=lld"]
rustflags = [
"-C", "link-arg=-fuse-ld=bfd"
]

View File

@@ -4,7 +4,7 @@ ENV LANG C.UTF-8
RUN sed -i s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g /etc/apt/sources.list
RUN apt-get clean && apt-get update && apt-get install wget git curl unzip gcc pkg-config libssl-dev -y
RUN apt-get clean && apt-get update && apt-get install wget git curl unzip gcc pkg-config libssl-dev lld libdbus-1-dev libwayland-dev libwebkit2gtk-4.1-dev libxdo-dev -y
# install protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v27.0/protoc-27.0-linux-x86_64.zip \

View File

@@ -4,7 +4,7 @@ ENV LANG C.UTF-8
RUN sed -i s@http://.*archive.ubuntu.com@http://repo.huaweicloud.com@g /etc/apt/sources.list
RUN apt-get clean && apt-get update && apt-get install wget git curl unzip gcc pkg-config libssl-dev -y
RUN apt-get clean && apt-get update && apt-get install wget git curl unzip gcc pkg-config libssl-dev lld libdbus-1-dev libwayland-dev libwebkit2gtk-4.1-dev libxdo-dev -y
# install protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v27.0/protoc-27.0-linux-x86_64.zip \

1
.gitignore vendored
View File

@@ -4,6 +4,7 @@
.vscode
/test
/logs
/data
.devcontainer
rustfs/static/*
vendor

824
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -7,8 +7,6 @@ members = [
"common/common", # Shared utilities and data structures
"common/lock", # Distributed locking implementation
"common/protos", # Protocol buffer definitions
"api/admin", # Admin HTTP API endpoints
"reader", # Object reading service
"common/workers", # Worker thread pools and task scheduling
"iam", # Identity and Access Management
"crypto", # Cryptography and security features
@@ -46,7 +44,6 @@ flatbuffers = "24.12.23"
futures = "0.3.31"
futures-util = "0.3.31"
common = { path = "./common/common" }
reader = { path = "./reader" }
hex = "0.4.3"
hyper = "1.6.0"
hyper-util = { version = "0.1.10", features = [
@@ -77,8 +74,8 @@ prost-types = "0.13.4"
protobuf = "3.7"
protos = { path = "./common/protos" }
rand = "0.8.5"
reqwest = { version = "0.12.12", default-features = false, features = ["json", "rustls-tls", "charset", "http2", "macos-system-configuration", "stream"] }
rdkafka = { version = "0.37", features = ["tokio"] }
reqwest = { version = "0.12.12", default-features = false, features = ["rustls-tls", "charset", "http2", "macos-system-configuration", "stream", "json", "blocking"] }
rfd = { version = "0.15.2", default-features = false, features = ["xdg-portal", "tokio"] }
rmp = "0.8.14"
rmp-serde = "1.3.0"
@@ -125,7 +122,6 @@ axum = "0.7.9"
md-5 = "0.10.6"
workers = { path = "./common/workers" }
test-case = "3.3.1"
zip = "2.2.3"
[profile.wasm-dev]
inherits = "dev"

17
Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM alpine:latest
# RUN apk add --no-cache <package-name>
WORKDIR /app
RUN mkdir -p /data/rustfs0 /data/rustfs1 /data/rustfs2 /data/rustfs3
COPY ./target/x86_64-unknown-linux-musl/release/rustfs /app/rustfs
RUN chmod +x /app/rustfs
EXPOSE 9000
EXPOSE 9001
CMD ["/app/rustfs"]

View File

@@ -37,9 +37,9 @@ probe-e2e:
# in target/rockylinux9.3/release/rustfs
BUILD_OS ?= rockylinux9.3
.PHONY: build
build: ROCKYLINUX_BUILD_IMAGE_NAME = $(BUILD_OS):v1
build: ROCKYLINUX_BUILD_IMAGE_NAME = rustfs-$(BUILD_OS):v1
build: ROCKYLINUX_BUILD_CONTAINER_NAME = rustfs-$(BUILD_OS)-build
build: BUILD_CMD = /root/.cargo/bin/cargo build --release --target-dir /root/s3-rustfs/target/$(BUILD_OS)
build: BUILD_CMD = /root/.cargo/bin/cargo build --release --bin rustfs --target-dir /root/s3-rustfs/target/$(BUILD_OS)
build:
$(DOCKER_CLI) build -t $(ROCKYLINUX_BUILD_IMAGE_NAME) -f $(DOCKERFILE_PATH)/Dockerfile.$(BUILD_OS) .
$(DOCKER_CLI) run --rm --name $(ROCKYLINUX_BUILD_CONTAINER_NAME) -v $(shell pwd):/root/s3-rustfs -it $(ROCKYLINUX_BUILD_IMAGE_NAME) $(BUILD_CMD)

View File

@@ -57,3 +57,11 @@
- [ ] 对象压缩
- [ ] STS
- [ ] 分层阿里云、腾讯云、S3远程对接
## 性能优化
- [ ] bitrot impl AsyncRead/AsyncWrite
- [ ] erasure 并发读写
- [ ] 完善删除逻辑, 并发处理,先移动到回收站,空间不足时清空回收站
- [ ] list_object 使用reader传输

View File

@@ -1,21 +0,0 @@
[package]
name = "admin"
edition.workspace = true
license.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true
[lints]
workspace = true
[dependencies]
axum.workspace = true
mime.workspace = true
serde.workspace = true
serde_json.workspace = true
ecstore = { path = "../../ecstore" }
time = { workspace = true, features = ["serde"] }
tower.workspace = true
futures-util = "0.3.31"
hyper.workspace = true

View File

@@ -1,98 +0,0 @@
use axum::{
body::Body,
http::{header::CONTENT_TYPE, HeaderValue, StatusCode},
response::{IntoResponse, Response},
};
use mime::APPLICATION_JSON;
use serde::Serialize;
#[derive(Serialize, Default)]
#[serde(rename_all = "PascalCase")]
pub struct ErrorResponse {
pub code: String,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bucket_name: Option<String>,
pub resource: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub region: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub request_id: Option<String>,
pub host_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub actual_object_size: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub range_requested: Option<String>,
}
impl IntoResponse for APIError {
fn into_response(self) -> Response {
let code = self.http_status_code;
let err_response = ErrorResponse::from(self);
let json_res = match serde_json::to_vec(&err_response) {
Ok(r) => r,
Err(e) => return (StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")).into_response(),
};
Response::builder()
.status(code)
.header(CONTENT_TYPE, HeaderValue::from_static(APPLICATION_JSON.as_ref()))
.body(Body::from(json_res))
.unwrap_or_else(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")).into_response())
}
}
#[derive(Default)]
pub struct APIError {
code: String,
description: String,
http_status_code: StatusCode,
object_size: Option<String>,
range_requested: Option<String>,
}
pub enum ErrorCode {
ErrNotImplemented,
ErrServerNotInitialized,
}
impl IntoResponse for ErrorCode {
fn into_response(self) -> Response {
APIError::from(self).into_response()
}
}
impl From<ErrorCode> for APIError {
fn from(value: ErrorCode) -> Self {
use ErrorCode::*;
match value {
ErrNotImplemented => APIError {
code: "NotImplemented".into(),
description: "A header you provided implies functionality that is not implemented.".into(),
http_status_code: StatusCode::NOT_IMPLEMENTED,
..Default::default()
},
ErrServerNotInitialized => APIError {
code: "ServerNotInitialized".into(),
description: "Server not initialized yet, please try again.".into(),
http_status_code: StatusCode::SERVICE_UNAVAILABLE,
..Default::default()
},
}
}
}
impl From<APIError> for ErrorResponse {
fn from(value: APIError) -> Self {
Self {
code: value.code,
message: value.description,
actual_object_size: value.object_size,
range_requested: value.range_requested,
..Default::default()
}
}
}

View File

@@ -1 +0,0 @@
pub mod list_pools;

View File

@@ -1,83 +0,0 @@
use crate::error::ErrorCode;
use crate::Result as LocalResult;
use axum::Json;
use ecstore::new_object_layer_fn;
use serde::Serialize;
use time::OffsetDateTime;
#[derive(Serialize)]
pub struct PoolStatus {
id: i64,
cmdline: String,
#[serde(rename = "lastUpdate")]
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
last_updat: OffsetDateTime,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(rename = "decommissionInfo")]
decommission_info: Option<PoolDecommissionInfo>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct PoolDecommissionInfo {
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
start_time: OffsetDateTime,
start_size: i64,
total_size: i64,
current_size: i64,
complete: bool,
failed: bool,
canceled: bool,
#[serde(rename = "objectsDecommissioned")]
items_decommissioned: i64,
#[serde(rename = "objectsDecommissionedFailed")]
items_decommission_failed: i64,
#[serde(rename = "bytesDecommissioned")]
bytes_done: i64,
#[serde(rename = "bytesDecommissionedFailed")]
bytes_failed: i64,
}
pub async fn handler() -> LocalResult<Json<Vec<PoolStatus>>> {
// if ecstore::is_legacy().await {
// return Err(ErrorCode::ErrNotImplemented);
// }
//
//
// todo 实用oncelock作为全局变量
let Some(store) = new_object_layer_fn() else { return Err(ErrorCode::ErrNotImplemented) };
// todo, 调用pool.status()接口获取每个池的数据
//
let mut result = Vec::new();
for (idx, _pool) in store.pools.iter().enumerate() {
// 这里mock一下数据
result.push(PoolStatus {
id: idx as _,
cmdline: "cmdline".into(),
last_updat: OffsetDateTime::now_utc(),
decommission_info: if idx % 2 == 0 {
Some(PoolDecommissionInfo {
start_time: OffsetDateTime::now_utc(),
start_size: 1,
total_size: 2,
current_size: 2,
complete: true,
failed: true,
canceled: true,
items_decommissioned: 1,
items_decommission_failed: 1,
bytes_done: 1,
bytes_failed: 1,
})
} else {
None
},
})
}
Ok(Json(result))
}

View File

@@ -1,20 +0,0 @@
pub mod error;
pub mod handlers;
use axum::{extract::Request, response::Response, routing::get, BoxError, Router};
use error::ErrorCode;
use handlers::list_pools;
use tower::Service;
pub type Result<T> = std::result::Result<T, ErrorCode>;
const API_VERSION: &str = "/v3";
pub fn register_admin_router() -> impl Service<Request, Response = Response, Error: Into<BoxError>, Future: Send> + Clone {
Router::new()
.nest(
"/rustfs/admin",
Router::new().nest(API_VERSION, Router::new().route("/pools/list", get(list_pools::handler))),
)
.into_service()
}

View File

@@ -1,9 +1,10 @@
// automatically generated by the FlatBuffers compiler, do not modify
// @generated
use core::cmp::Ordering;
use core::mem;
use core::cmp::Ordering;
extern crate flatbuffers;
use self::flatbuffers::{EndianScalar, Follow};
@@ -11,114 +12,112 @@ use self::flatbuffers::{EndianScalar, Follow};
#[allow(unused_imports, dead_code)]
pub mod models {
use core::cmp::Ordering;
use core::mem;
use core::mem;
use core::cmp::Ordering;
extern crate flatbuffers;
use self::flatbuffers::{EndianScalar, Follow};
extern crate flatbuffers;
use self::flatbuffers::{EndianScalar, Follow};
pub enum PingBodyOffset {}
#[derive(Copy, Clone, PartialEq)]
pub enum PingBodyOffset {}
#[derive(Copy, Clone, PartialEq)]
pub struct PingBody<'a> {
pub _tab: flatbuffers::Table<'a>,
pub struct PingBody<'a> {
pub _tab: flatbuffers::Table<'a>,
}
impl<'a> flatbuffers::Follow<'a> for PingBody<'a> {
type Inner = PingBody<'a>;
#[inline]
unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
Self { _tab: flatbuffers::Table::new(buf, loc) }
}
}
impl<'a> PingBody<'a> {
pub const VT_PAYLOAD: flatbuffers::VOffsetT = 4;
pub const fn get_fully_qualified_name() -> &'static str {
"models.PingBody"
}
#[inline]
pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
PingBody { _tab: table }
}
#[allow(unused_mut)]
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
args: &'args PingBodyArgs<'args>
) -> flatbuffers::WIPOffset<PingBody<'bldr>> {
let mut builder = PingBodyBuilder::new(_fbb);
if let Some(x) = args.payload { builder.add_payload(x); }
builder.finish()
}
#[inline]
pub fn payload(&self) -> Option<flatbuffers::Vector<'a, u8>> {
// Safety:
// Created from valid Table for this object
// which contains a valid value in this slot
unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(PingBody::VT_PAYLOAD, None)}
}
}
impl flatbuffers::Verifiable for PingBody<'_> {
#[inline]
fn run_verifier(
v: &mut flatbuffers::Verifier, pos: usize
) -> Result<(), flatbuffers::InvalidFlatbuffer> {
use self::flatbuffers::Verifiable;
v.visit_table(pos)?
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>("payload", Self::VT_PAYLOAD, false)?
.finish();
Ok(())
}
}
pub struct PingBodyArgs<'a> {
pub payload: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
}
impl<'a> Default for PingBodyArgs<'a> {
#[inline]
fn default() -> Self {
PingBodyArgs {
payload: None,
}
}
}
impl<'a> flatbuffers::Follow<'a> for PingBody<'a> {
type Inner = PingBody<'a>;
#[inline]
unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
Self {
_tab: flatbuffers::Table::new(buf, loc),
}
}
pub struct PingBodyBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
}
impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PingBodyBuilder<'a, 'b, A> {
#[inline]
pub fn add_payload(&mut self, payload: flatbuffers::WIPOffset<flatbuffers::Vector<'b , u8>>) {
self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(PingBody::VT_PAYLOAD, payload);
}
#[inline]
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> PingBodyBuilder<'a, 'b, A> {
let start = _fbb.start_table();
PingBodyBuilder {
fbb_: _fbb,
start_: start,
}
}
#[inline]
pub fn finish(self) -> flatbuffers::WIPOffset<PingBody<'a>> {
let o = self.fbb_.end_table(self.start_);
flatbuffers::WIPOffset::new(o.value())
}
}
impl<'a> PingBody<'a> {
pub const VT_PAYLOAD: flatbuffers::VOffsetT = 4;
impl core::fmt::Debug for PingBody<'_> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut ds = f.debug_struct("PingBody");
ds.field("payload", &self.payload());
ds.finish()
}
}
} // pub mod models
pub const fn get_fully_qualified_name() -> &'static str {
"models.PingBody"
}
#[inline]
pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
PingBody { _tab: table }
}
#[allow(unused_mut)]
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
args: &'args PingBodyArgs<'args>,
) -> flatbuffers::WIPOffset<PingBody<'bldr>> {
let mut builder = PingBodyBuilder::new(_fbb);
if let Some(x) = args.payload {
builder.add_payload(x);
}
builder.finish()
}
#[inline]
pub fn payload(&self) -> Option<flatbuffers::Vector<'a, u8>> {
// Safety:
// Created from valid Table for this object
// which contains a valid value in this slot
unsafe {
self._tab
.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(PingBody::VT_PAYLOAD, None)
}
}
}
impl flatbuffers::Verifiable for PingBody<'_> {
#[inline]
fn run_verifier(v: &mut flatbuffers::Verifier, pos: usize) -> Result<(), flatbuffers::InvalidFlatbuffer> {
use self::flatbuffers::Verifiable;
v.visit_table(pos)?
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>("payload", Self::VT_PAYLOAD, false)?
.finish();
Ok(())
}
}
pub struct PingBodyArgs<'a> {
pub payload: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
}
impl<'a> Default for PingBodyArgs<'a> {
#[inline]
fn default() -> Self {
PingBodyArgs { payload: None }
}
}
pub struct PingBodyBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
}
impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PingBodyBuilder<'a, 'b, A> {
#[inline]
pub fn add_payload(&mut self, payload: flatbuffers::WIPOffset<flatbuffers::Vector<'b, u8>>) {
self.fbb_
.push_slot_always::<flatbuffers::WIPOffset<_>>(PingBody::VT_PAYLOAD, payload);
}
#[inline]
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> PingBodyBuilder<'a, 'b, A> {
let start = _fbb.start_table();
PingBodyBuilder {
fbb_: _fbb,
start_: start,
}
}
#[inline]
pub fn finish(self) -> flatbuffers::WIPOffset<PingBody<'a>> {
let o = self.fbb_.end_table(self.start_);
flatbuffers::WIPOffset::new(o.value())
}
}
impl core::fmt::Debug for PingBody<'_> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut ds = f.debug_struct("PingBody");
ds.field("payload", &self.payload());
ds.finish()
}
}
} // pub mod models

File diff suppressed because it is too large Load Diff

82
docker-compose.yaml Normal file
View File

@@ -0,0 +1,82 @@
services:
node0:
image: rustfs:v1 # 替换为你的镜像名称和标签
container_name: node0
hostname: node0
environment:
- RUSTFS_VOLUMES=http://node{0...3}:9000/data/rustfs{0...3}
- RUSTFS_ADDRESS=0.0.0.0:9000
- RUSTFS_CONSOLE_ENABLE=true
- RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9002
platform: linux/amd64
ports:
- "9000:9000" # 映射宿主机的 9001 端口到容器的 9000 端口
- "8000:9001" # 映射宿主机的 9001 端口到容器的 9000 端口
volumes:
- ./target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
# - ./data/node0:/data # 将当前路径挂载到容器内的 /root/data
command: "/app/rustfs"
node1:
image: rustfs:v1
container_name: node1
hostname: node1
environment:
- RUSTFS_VOLUMES=http://node{0...3}:9000/data/rustfs{0...3}
- RUSTFS_ADDRESS=0.0.0.0:9000
- RUSTFS_CONSOLE_ENABLE=true
- RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9002
platform: linux/amd64
ports:
- "9001:9000" # 映射宿主机的 9002 端口到容器的 9000 端口
volumes:
- ./target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
# - ./data/node1:/data
command: "/app/rustfs"
node2:
image: rustfs:v1
container_name: node2
hostname: node2
environment:
- RUSTFS_VOLUMES=http://node{0...3}:9000/data/rustfs{0...3}
- RUSTFS_ADDRESS=0.0.0.0:9000
- RUSTFS_CONSOLE_ENABLE=true
- RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9002
platform: linux/amd64
ports:
- "9002:9000" # 映射宿主机的 9003 端口到容器的 9000 端口
volumes:
- ./target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
# - ./data/node2:/data
command: "/app/rustfs"
node3:
image: rustfs:v1
container_name: node3
hostname: node3
environment:
- RUSTFS_VOLUMES=http://node{0...3}:9000/data/rustfs{0...3}
- RUSTFS_ADDRESS=0.0.0.0:9000
- RUSTFS_CONSOLE_ENABLE=true
- RUSTFS_CONSOLE_ADDRESS=0.0.0.0:9002
platform: linux/amd64
ports:
- "9003:9000" # 映射宿主机的 9004 端口到容器的 9000 端口
volumes:
- ./target/x86_64-unknown-linux-musl/release/rustfs:/app/rustfs
# - ./data/node3:/data
command: "/app/rustfs"
2025-03-14T05:23:15.661154Z INFO ecstore::disk::os: reliable_rename rm dst failed. src_file_path: "/data/rustfs1/.rustfs.sys/tmp/c7fabb9c-48c8-4827-b5e2-13271c3867c3x1741929793/part.38", dst_file_path: "/data/rustfs1/.rustfs.sys/multipart/494d877741f5e87d5160dc4e1bd4fbdacda64559ea0b7d16cdbeed61f252b98f/a83dc20f-e73a-46d0-a02b-11b330ba6e7ex1741929773056730169/641d3efd-cca0-418e-983b-ca2d47652900/part.38", base_dir: "/data/rustfs1/.rustfs.sys/multipart", err: Os { code: 2, kind: NotFound, message: "No such file or directory" }
at ecstore/src/disk/os.rs:144
2025-03-14T05:23:15.953116Z INFO ecstore::disk::os: reliable_rename rm dst failed. src_file_path: "/data/rustfs3/.rustfs.sys/tmp/e712821f-bc3f-4ffe-8a0c-0daa379d00d4x1741929793/part.39", dst_file_path: "/data/rustfs3/.rustfs.sys/multipart/494d877741f5e87d5160dc4e1bd4fbdacda64559ea0b7d16cdbeed61f252b98f/a83dc20f-e73a-46d0-a02b-11b330ba6e7ex1741929773056730169/641d3efd-cca0-418e-983b-ca2d47652900/part.39", base_dir: "/data/rustfs3/.rustfs.sys/multipart", err: Os { code: 2, kind: NotFound, message: "No such file or directory" }
at ecstore/src/disk/os.rs:144
2025-03-14T05:23:15.953218Z INFO ecstore::disk::os: reliable_rename rm dst failed. src_file_path: "/data/rustfs2/.rustfs.sys/tmp/e712821f-bc3f-4ffe-8a0c-0daa379d00d4x1741929793/part.39", dst_file_path: "/data/rustfs2/.rustfs.sys/multipart/494d877741f5e87d5160dc4e1bd4fbdacda64559ea0b7d16cdbeed61f252b98f/a83dc20f-e73a-46d0-a02b-11b330ba6e7ex1741929773056730169/641d3efd-cca0-418e-983b-ca2d47652900/part.39", base_dir: "/data/rustfs2/.rustfs.sys/multipart", err: Os { code: 2, kind: NotFound, message: "No such file or directory" }
at ecstore/src/disk/os.rs:144

View File

@@ -17,7 +17,6 @@ blake2 = "0.10.6"
bytes.workspace = true
common.workspace = true
chrono.workspace = true
reader.workspace = true
glob = "0.3.2"
thiserror.workspace = true
flatbuffers.workspace = true
@@ -66,6 +65,8 @@ pin-project-lite.workspace = true
md-5.workspace = true
madmin.workspace = true
workers.workspace = true
reqwest = { workspace = true }
urlencoding = "2.1.3"
[target.'cfg(not(windows))'.dependencies]

View File

@@ -1,28 +1,19 @@
use crate::{
disk::{error::DiskError, DiskAPI, DiskStore, FileReader, FileWriter, Reader},
disk::{error::DiskError, Disk, DiskAPI},
erasure::{ReadAt, Writer},
error::{Error, Result},
io::{FileReader, FileWriter},
store_api::BitrotAlgorithm,
};
use blake2::Blake2b512;
use blake2::Digest as _;
use highway::{HighwayHash, HighwayHasher, Key};
use lazy_static::lazy_static;
use sha2::{digest::core_api::BlockSizeUser, Digest, Sha256};
use std::{
any::Any,
collections::HashMap,
io::{Cursor, Read},
};
use std::{any::Any, collections::HashMap, io::Cursor, sync::Arc};
use tokio::io::{AsyncReadExt as _, AsyncWriteExt};
use tracing::{error, info};
use tokio::{
spawn,
sync::mpsc::{self, Sender},
task::JoinHandle,
};
lazy_static! {
static ref BITROT_ALGORITHMS: HashMap<BitrotAlgorithm, &'static str> = {
let mut m = HashMap::new();
@@ -150,41 +141,41 @@ pub fn bitrot_algorithm_from_string(s: &str) -> BitrotAlgorithm {
pub type BitrotWriter = Box<dyn Writer + Send + 'static>;
pub async fn new_bitrot_writer(
disk: DiskStore,
orig_volume: &str,
volume: &str,
file_path: &str,
length: usize,
algo: BitrotAlgorithm,
shard_size: usize,
) -> Result<BitrotWriter> {
if algo == BitrotAlgorithm::HighwayHash256S {
return Ok(Box::new(
StreamingBitrotWriter::new(disk, orig_volume, volume, file_path, length, algo, shard_size).await?,
));
}
Ok(Box::new(WholeBitrotWriter::new(disk, volume, file_path, algo, shard_size)))
}
// pub async fn new_bitrot_writer(
// disk: DiskStore,
// orig_volume: &str,
// volume: &str,
// file_path: &str,
// length: usize,
// algo: BitrotAlgorithm,
// shard_size: usize,
// ) -> Result<BitrotWriter> {
// if algo == BitrotAlgorithm::HighwayHash256S {
// return Ok(Box::new(
// StreamingBitrotWriter::new(disk, orig_volume, volume, file_path, length, algo, shard_size).await?,
// ));
// }
// Ok(Box::new(WholeBitrotWriter::new(disk, volume, file_path, algo, shard_size)))
// }
pub type BitrotReader = Box<dyn ReadAt + Send>;
#[allow(clippy::too_many_arguments)]
pub fn new_bitrot_reader(
disk: DiskStore,
data: &[u8],
bucket: &str,
file_path: &str,
till_offset: usize,
algo: BitrotAlgorithm,
sum: &[u8],
shard_size: usize,
) -> BitrotReader {
if algo == BitrotAlgorithm::HighwayHash256S {
return Box::new(StreamingBitrotReader::new(disk, data, bucket, file_path, algo, till_offset, shard_size));
}
Box::new(WholeBitrotReader::new(disk, bucket, file_path, algo, till_offset, sum))
}
// #[allow(clippy::too_many_arguments)]
// pub fn new_bitrot_reader(
// disk: DiskStore,
// data: &[u8],
// bucket: &str,
// file_path: &str,
// till_offset: usize,
// algo: BitrotAlgorithm,
// sum: &[u8],
// shard_size: usize,
// ) -> BitrotReader {
// if algo == BitrotAlgorithm::HighwayHash256S {
// return Box::new(StreamingBitrotReader::new(disk, data, bucket, file_path, algo, till_offset, shard_size));
// }
// Box::new(WholeBitrotReader::new(disk, bucket, file_path, algo, till_offset, sum))
// }
pub async fn close_bitrot_writers(writers: &mut [Option<BitrotWriter>]) -> Result<()> {
for w in writers.iter_mut().flatten() {
@@ -194,13 +185,13 @@ pub async fn close_bitrot_writers(writers: &mut [Option<BitrotWriter>]) -> Resul
Ok(())
}
pub fn bitrot_writer_sum(w: &BitrotWriter) -> Vec<u8> {
if let Some(w) = w.as_any().downcast_ref::<WholeBitrotWriter>() {
return w.hash.clone().finalize();
}
// pub fn bitrot_writer_sum(w: &BitrotWriter) -> Vec<u8> {
// if let Some(w) = w.as_any().downcast_ref::<WholeBitrotWriter>() {
// return w.hash.clone().finalize();
// }
Vec::new()
}
// Vec::new()
// }
pub fn bitrot_shard_file_size(size: usize, shard_size: usize, algo: BitrotAlgorithm) -> usize {
if algo != BitrotAlgorithm::HighwayHash256S {
@@ -209,25 +200,25 @@ pub fn bitrot_shard_file_size(size: usize, shard_size: usize, algo: BitrotAlgori
size.div_ceil(shard_size) * algo.new_hasher().size() + size
}
pub fn bitrot_verify(
r: &mut Cursor<Vec<u8>>,
pub async fn bitrot_verify(
r: FileReader,
want_size: usize,
part_size: usize,
algo: BitrotAlgorithm,
want: Vec<u8>,
_want: Vec<u8>,
mut shard_size: usize,
) -> Result<()> {
if algo != BitrotAlgorithm::HighwayHash256S {
let mut h = algo.new_hasher();
h.update(r.get_ref());
let hash = h.finalize();
if hash != want {
info!("bitrot_verify except: {:?}, got: {:?}", want, hash);
return Err(Error::new(DiskError::FileCorrupt));
}
// if algo != BitrotAlgorithm::HighwayHash256S {
// let mut h = algo.new_hasher();
// h.update(r.get_ref());
// let hash = h.finalize();
// if hash != want {
// info!("bitrot_verify except: {:?}, got: {:?}", want, hash);
// return Err(Error::new(DiskError::FileCorrupt));
// }
return Ok(());
}
// return Ok(());
// }
let mut h = algo.new_hasher();
let mut hash_buf = vec![0; h.size()];
let mut left = want_size;
@@ -240,9 +231,11 @@ pub fn bitrot_verify(
return Err(Error::new(DiskError::FileCorrupt));
}
let mut r = r;
while left > 0 {
h.reset();
let n = r.read(&mut hash_buf)?;
let n = r.read_exact(&mut hash_buf).await?;
left -= n;
if left < shard_size {
@@ -250,7 +243,7 @@ pub fn bitrot_verify(
}
let mut buf = vec![0; shard_size];
let read = r.read(&mut buf)?;
let read = r.read_exact(&mut buf).await?;
h.update(buf);
left -= read;
let hash = h.clone().finalize();
@@ -263,249 +256,274 @@ pub fn bitrot_verify(
Ok(())
}
pub struct WholeBitrotWriter {
disk: DiskStore,
volume: String,
file_path: String,
_shard_size: usize,
pub hash: Hasher,
}
// pub struct WholeBitrotWriter {
// disk: DiskStore,
// volume: String,
// file_path: String,
// _shard_size: usize,
// pub hash: Hasher,
// }
impl WholeBitrotWriter {
pub fn new(disk: DiskStore, volume: &str, file_path: &str, algo: BitrotAlgorithm, shard_size: usize) -> Self {
WholeBitrotWriter {
disk,
volume: volume.to_string(),
file_path: file_path.to_string(),
_shard_size: shard_size,
hash: algo.new_hasher(),
}
}
}
// impl WholeBitrotWriter {
// pub fn new(disk: DiskStore, volume: &str, file_path: &str, algo: BitrotAlgorithm, shard_size: usize) -> Self {
// WholeBitrotWriter {
// disk,
// volume: volume.to_string(),
// file_path: file_path.to_string(),
// _shard_size: shard_size,
// hash: algo.new_hasher(),
// }
// }
// }
#[async_trait::async_trait]
impl Writer for WholeBitrotWriter {
fn as_any(&self) -> &dyn Any {
self
}
// #[async_trait::async_trait]
// impl Writer for WholeBitrotWriter {
// fn as_any(&self) -> &dyn Any {
// self
// }
async fn write(&mut self, buf: &[u8]) -> Result<()> {
let mut file = self.disk.append_file(&self.volume, &self.file_path).await?;
let _ = file.write(buf).await?;
self.hash.update(buf);
// async fn write(&mut self, buf: &[u8]) -> Result<()> {
// let mut file = self.disk.append_file(&self.volume, &self.file_path).await?;
// let _ = file.write(buf).await?;
// self.hash.update(buf);
Ok(())
}
}
// Ok(())
// }
// }
#[derive(Debug)]
pub struct WholeBitrotReader {
disk: DiskStore,
volume: String,
file_path: String,
_verifier: BitrotVerifier,
till_offset: usize,
buf: Option<Vec<u8>>,
}
// #[derive(Debug)]
// pub struct WholeBitrotReader {
// disk: DiskStore,
// volume: String,
// file_path: String,
// _verifier: BitrotVerifier,
// till_offset: usize,
// buf: Option<Vec<u8>>,
// }
impl WholeBitrotReader {
pub fn new(disk: DiskStore, volume: &str, file_path: &str, algo: BitrotAlgorithm, till_offset: usize, sum: &[u8]) -> Self {
Self {
disk,
volume: volume.to_string(),
file_path: file_path.to_string(),
_verifier: BitrotVerifier::new(algo, sum),
till_offset,
buf: None,
}
}
}
// impl WholeBitrotReader {
// pub fn new(disk: DiskStore, volume: &str, file_path: &str, algo: BitrotAlgorithm, till_offset: usize, sum: &[u8]) -> Self {
// Self {
// disk,
// volume: volume.to_string(),
// file_path: file_path.to_string(),
// _verifier: BitrotVerifier::new(algo, sum),
// till_offset,
// buf: None,
// }
// }
// }
#[async_trait::async_trait]
impl ReadAt for WholeBitrotReader {
async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)> {
if self.buf.is_none() {
let buf_len = self.till_offset - offset;
let mut file = self.disk.read_file(&self.volume, &self.file_path).await?;
let mut buf = vec![0u8; buf_len];
file.read_at(offset, &mut buf).await?;
self.buf = Some(buf);
}
// #[async_trait::async_trait]
// impl ReadAt for WholeBitrotReader {
// async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)> {
// if self.buf.is_none() {
// let buf_len = self.till_offset - offset;
// let mut file = self
// .disk
// .read_file_stream(&self.volume, &self.file_path, offset, length)
// .await?;
// let mut buf = vec![0u8; buf_len];
// file.read_at(offset, &mut buf).await?;
// self.buf = Some(buf);
// }
if let Some(buf) = &mut self.buf {
if buf.len() < length {
return Err(Error::new(DiskError::LessData));
}
// if let Some(buf) = &mut self.buf {
// if buf.len() < length {
// return Err(Error::new(DiskError::LessData));
// }
return Ok((buf.drain(0..length).collect::<Vec<_>>(), length));
}
// return Ok((buf.drain(0..length).collect::<Vec<_>>(), length));
// }
Err(Error::new(DiskError::LessData))
}
}
// Err(Error::new(DiskError::LessData))
// }
// }
struct StreamingBitrotWriter {
hasher: Hasher,
tx: Sender<Option<Vec<u8>>>,
task: Option<JoinHandle<()>>,
}
// struct StreamingBitrotWriter {
// hasher: Hasher,
// tx: Sender<Option<Vec<u8>>>,
// task: Option<JoinHandle<()>>,
// }
impl StreamingBitrotWriter {
pub async fn new(
disk: DiskStore,
orig_volume: &str,
volume: &str,
file_path: &str,
length: usize,
algo: BitrotAlgorithm,
shard_size: usize,
) -> Result<Self> {
let hasher = algo.new_hasher();
let (tx, mut rx) = mpsc::channel::<Option<Vec<u8>>>(10);
// impl StreamingBitrotWriter {
// pub async fn new(
// disk: DiskStore,
// orig_volume: &str,
// volume: &str,
// file_path: &str,
// length: usize,
// algo: BitrotAlgorithm,
// shard_size: usize,
// ) -> Result<Self> {
// let hasher = algo.new_hasher();
// let (tx, mut rx) = mpsc::channel::<Option<Vec<u8>>>(10);
let total_file_size = length.div_ceil(shard_size) * hasher.size() + length;
let mut writer = disk.create_file(orig_volume, volume, file_path, total_file_size).await?;
// let total_file_size = length.div_ceil(shard_size) * hasher.size() + length;
// let mut writer = disk.create_file(orig_volume, volume, file_path, total_file_size).await?;
let task = spawn(async move {
loop {
if let Some(Some(buf)) = rx.recv().await {
writer.write(&buf).await.unwrap();
continue;
}
// let task = spawn(async move {
// loop {
// if let Some(Some(buf)) = rx.recv().await {
// writer.write(&buf).await.unwrap();
// continue;
// }
break;
}
});
// break;
// }
// });
Ok(StreamingBitrotWriter {
hasher,
tx,
task: Some(task),
})
}
}
// Ok(StreamingBitrotWriter {
// hasher,
// tx,
// task: Some(task),
// })
// }
// }
#[async_trait::async_trait]
impl Writer for StreamingBitrotWriter {
fn as_any(&self) -> &dyn Any {
self
}
// #[async_trait::async_trait]
// impl Writer for StreamingBitrotWriter {
// fn as_any(&self) -> &dyn Any {
// self
// }
async fn write(&mut self, buf: &[u8]) -> Result<()> {
if buf.is_empty() {
return Ok(());
}
self.hasher.reset();
self.hasher.update(buf);
let hash_bytes = self.hasher.clone().finalize();
let _ = self.tx.send(Some(hash_bytes)).await?;
let _ = self.tx.send(Some(buf.to_vec())).await?;
// async fn write(&mut self, buf: &[u8]) -> Result<()> {
// if buf.is_empty() {
// return Ok(());
// }
// self.hasher.reset();
// self.hasher.update(buf);
// let hash_bytes = self.hasher.clone().finalize();
// let _ = self.tx.send(Some(hash_bytes)).await?;
// let _ = self.tx.send(Some(buf.to_vec())).await?;
Ok(())
}
// Ok(())
// }
async fn close(&mut self) -> Result<()> {
let _ = self.tx.send(None).await?;
if let Some(task) = self.task.take() {
let _ = task.await; // 等待任务完成
}
Ok(())
}
}
// async fn close(&mut self) -> Result<()> {
// let _ = self.tx.send(None).await?;
// if let Some(task) = self.task.take() {
// let _ = task.await; // 等待任务完成
// }
// Ok(())
// }
// }
#[derive(Debug)]
struct StreamingBitrotReader {
disk: DiskStore,
_data: Vec<u8>,
volume: String,
file_path: String,
till_offset: usize,
curr_offset: usize,
hasher: Hasher,
shard_size: usize,
buf: Vec<u8>,
hash_bytes: Vec<u8>,
}
// #[derive(Debug)]
// struct StreamingBitrotReader {
// disk: DiskStore,
// _data: Vec<u8>,
// volume: String,
// file_path: String,
// till_offset: usize,
// curr_offset: usize,
// hasher: Hasher,
// shard_size: usize,
// buf: Vec<u8>,
// hash_bytes: Vec<u8>,
// }
impl StreamingBitrotReader {
pub fn new(
disk: DiskStore,
data: &[u8],
volume: &str,
file_path: &str,
algo: BitrotAlgorithm,
till_offset: usize,
shard_size: usize,
) -> Self {
let hasher = algo.new_hasher();
Self {
disk,
_data: data.to_vec(),
volume: volume.to_string(),
file_path: file_path.to_string(),
till_offset: till_offset.div_ceil(shard_size) * hasher.size() + till_offset,
curr_offset: 0,
hash_bytes: Vec::with_capacity(hasher.size()),
hasher,
shard_size,
buf: Vec::new(),
}
}
}
// impl StreamingBitrotReader {
// pub fn new(
// disk: DiskStore,
// data: &[u8],
// volume: &str,
// file_path: &str,
// algo: BitrotAlgorithm,
// till_offset: usize,
// shard_size: usize,
// ) -> Self {
// let hasher = algo.new_hasher();
// Self {
// disk,
// _data: data.to_vec(),
// volume: volume.to_string(),
// file_path: file_path.to_string(),
// till_offset: till_offset.div_ceil(shard_size) * hasher.size() + till_offset,
// curr_offset: 0,
// hash_bytes: Vec::with_capacity(hasher.size()),
// hasher,
// shard_size,
// buf: Vec::new(),
// }
// }
// }
#[async_trait::async_trait]
impl ReadAt for StreamingBitrotReader {
async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)> {
if offset % self.shard_size != 0 {
return Err(Error::new(DiskError::Unexpected));
}
if self.buf.is_empty() {
self.curr_offset = offset;
let stream_offset = (offset / self.shard_size) * self.hasher.size() + offset;
let buf_len = self.till_offset - stream_offset;
let mut file = self.disk.read_file(&self.volume, &self.file_path).await?;
let mut buf = vec![0u8; buf_len];
file.read_at(stream_offset, &mut buf).await?;
self.buf = buf;
}
if offset != self.curr_offset {
return Err(Error::new(DiskError::Unexpected));
}
// #[async_trait::async_trait]
// impl ReadAt for StreamingBitrotReader {
// async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)> {
// if offset % self.shard_size != 0 {
// return Err(Error::new(DiskError::Unexpected));
// }
// if self.buf.is_empty() {
// self.curr_offset = offset;
// let stream_offset = (offset / self.shard_size) * self.hasher.size() + offset;
// let buf_len = self.till_offset - stream_offset;
// let mut file = self.disk.read_file(&self.volume, &self.file_path).await?;
// let mut buf = vec![0u8; buf_len];
// file.read_at(stream_offset, &mut buf).await?;
// self.buf = buf;
// }
// if offset != self.curr_offset {
// return Err(Error::new(DiskError::Unexpected));
// }
self.hash_bytes = self.buf.drain(0..self.hash_bytes.capacity()).collect();
let buf = self.buf.drain(0..length).collect::<Vec<_>>();
self.hasher.reset();
self.hasher.update(&buf);
let actual = self.hasher.clone().finalize();
if actual != self.hash_bytes {
return Err(Error::new(DiskError::FileCorrupt));
}
// self.hash_bytes = self.buf.drain(0..self.hash_bytes.capacity()).collect();
// let buf = self.buf.drain(0..length).collect::<Vec<_>>();
// self.hasher.reset();
// self.hasher.update(&buf);
// let actual = self.hasher.clone().finalize();
// if actual != self.hash_bytes {
// return Err(Error::new(DiskError::FileCorrupt));
// }
let readed_len = buf.len();
self.curr_offset += readed_len;
// let readed_len = buf.len();
// self.curr_offset += readed_len;
Ok((buf, readed_len))
}
}
// Ok((buf, readed_len))
// }
// }
pub struct BitrotFileWriter {
pub inner: FileWriter,
inner: Option<FileWriter>,
hasher: Hasher,
_shard_size: usize,
inline: bool,
inline_data: Vec<u8>,
}
impl BitrotFileWriter {
pub fn new(inner: FileWriter, algo: BitrotAlgorithm, _shard_size: usize) -> Self {
pub async fn new(
disk: Arc<Disk>,
volume: &str,
path: &str,
inline: bool,
algo: BitrotAlgorithm,
_shard_size: usize,
) -> Result<Self> {
let inner = if !inline {
Some(disk.create_file("", volume, path, 0).await?)
} else {
None
};
let hasher = algo.new_hasher();
Self {
Ok(Self {
inner,
inline,
inline_data: Vec::new(),
hasher,
_shard_size,
}
})
}
pub fn writer(&self) -> &FileWriter {
&self.inner
// pub fn writer(&self) -> &FileWriter {
// &self.inner
// }
pub fn inline_data(&self) -> &[u8] {
&self.inline_data
}
}
@@ -522,21 +540,50 @@ impl Writer for BitrotFileWriter {
self.hasher.reset();
self.hasher.update(buf);
let hash_bytes = self.hasher.clone().finalize();
let _ = self.inner.write(&hash_bytes).await?;
let _ = self.inner.write(buf).await?;
if let Some(f) = self.inner.as_mut() {
f.write_all(&hash_bytes).await?;
f.write_all(buf).await?;
} else {
self.inline_data.extend_from_slice(&hash_bytes);
self.inline_data.extend_from_slice(buf);
}
Ok(())
}
async fn close(&mut self) -> Result<()> {
if self.inline {
return Ok(());
}
if let Some(f) = self.inner.as_mut() {
f.shutdown().await?;
}
Ok(())
}
}
pub fn new_bitrot_filewriter(inner: FileWriter, algo: BitrotAlgorithm, shard_size: usize) -> BitrotWriter {
Box::new(BitrotFileWriter::new(inner, algo, shard_size))
pub async fn new_bitrot_filewriter(
disk: Arc<Disk>,
volume: &str,
path: &str,
inline: bool,
algo: BitrotAlgorithm,
shard_size: usize,
) -> Result<BitrotWriter> {
let w = BitrotFileWriter::new(disk, volume, path, inline, algo, shard_size).await?;
Ok(Box::new(w))
}
#[derive(Debug)]
struct BitrotFileReader {
pub inner: FileReader,
// till_offset: usize,
disk: Arc<Disk>,
data: Option<Vec<u8>>,
volume: String,
file_path: String,
reader: Option<FileReader>,
till_offset: usize,
curr_offset: usize,
hasher: Hasher,
shard_size: usize,
@@ -545,28 +592,41 @@ struct BitrotFileReader {
read_buf: Vec<u8>,
}
// fn ceil(a: usize, b: usize) -> usize {
// (a + b - 1) / b
// }
fn ceil(a: usize, b: usize) -> usize {
a.div_ceil(b)
}
impl BitrotFileReader {
pub fn new(inner: FileReader, algo: BitrotAlgorithm, _till_offset: usize, shard_size: usize) -> Self {
pub fn new(
disk: Arc<Disk>,
data: Option<Vec<u8>>,
volume: String,
file_path: String,
algo: BitrotAlgorithm,
till_offset: usize,
shard_size: usize,
) -> Self {
let hasher = algo.new_hasher();
Self {
inner,
// till_offset: ceil(till_offset, shard_size) * hasher.size() + till_offset,
disk,
data,
volume,
file_path,
till_offset: ceil(till_offset, shard_size) * hasher.size() + till_offset,
curr_offset: 0,
hash_bytes: vec![0u8; hasher.size()],
hasher,
shard_size,
// buf: Vec::new(),
read_buf: Vec::new(),
reader: None,
}
}
}
#[async_trait::async_trait]
impl ReadAt for BitrotFileReader {
// 读取数据
async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)> {
if offset % self.shard_size != 0 {
error!(
@@ -578,53 +638,108 @@ impl ReadAt for BitrotFileReader {
return Err(Error::new(DiskError::Unexpected));
}
let stream_offset = (offset / self.shard_size) * self.hasher.size() + offset;
let buf_len = self.hasher.size() + length;
if self.reader.is_none() {
self.curr_offset = offset;
let stream_offset = (offset / self.shard_size) * self.hasher.size() + offset;
if let Some(data) = self.data.clone() {
self.reader = Some(Box::new(Cursor::new(data)));
} else {
self.reader = Some(
self.disk
.read_file_stream(&self.volume, &self.file_path, stream_offset, self.till_offset - stream_offset)
.await?,
);
}
}
if offset != self.curr_offset {
error!("BitrotFileReader read_at offset != self.curr_offset, {} != {}", offset, self.curr_offset);
return Err(Error::new(DiskError::Unexpected));
}
let reader = self.reader.as_mut().unwrap();
// let mut hash_buf = self.hash_bytes;
self.hash_bytes.clear();
self.hash_bytes.resize(self.hasher.size(), 0u8);
reader.read_exact(&mut self.hash_bytes).await?;
self.read_buf.clear();
self.read_buf.resize(buf_len, 0u8);
self.read_buf.resize(length, 0u8);
self.inner.read_at(stream_offset, &mut self.read_buf).await?;
let hash_bytes = &self.read_buf.as_slice()[0..self.hash_bytes.capacity()];
self.hash_bytes.clone_from_slice(hash_bytes);
let buf = self.read_buf.as_slice()[self.hash_bytes.capacity()..self.hash_bytes.capacity() + length].to_vec();
reader.read_exact(&mut self.read_buf).await?;
self.hasher.reset();
self.hasher.update(&buf);
self.hasher.update(&self.read_buf);
let actual = self.hasher.clone().finalize();
if actual != self.hash_bytes {
error!(
"BitrotFileReader read_at actual != self.hash_bytes, {:?} != {:?}",
actual, self.hash_bytes
);
return Err(Error::new(DiskError::FileCorrupt));
}
let readed_len = buf.len();
let readed_len = self.read_buf.len();
self.curr_offset += readed_len;
Ok((buf, readed_len))
Ok((self.read_buf.clone(), readed_len))
// let stream_offset = (offset / self.shard_size) * self.hasher.size() + offset;
// let buf_len = self.hasher.size() + length;
// self.read_buf.clear();
// self.read_buf.resize(buf_len, 0u8);
// self.inner.read_at(stream_offset, &mut self.read_buf).await?;
// let hash_bytes = &self.read_buf.as_slice()[0..self.hash_bytes.capacity()];
// self.hash_bytes.clone_from_slice(hash_bytes);
// let buf = self.read_buf.as_slice()[self.hash_bytes.capacity()..self.hash_bytes.capacity() + length].to_vec();
// self.hasher.reset();
// self.hasher.update(&buf);
// let actual = self.hasher.clone().finalize();
// if actual != self.hash_bytes {
// return Err(Error::new(DiskError::FileCorrupt));
// }
// let readed_len = buf.len();
// self.curr_offset += readed_len;
// Ok((buf, readed_len))
}
}
pub fn new_bitrot_filereader(inner: FileReader, till_offset: usize, algo: BitrotAlgorithm, shard_size: usize) -> BitrotReader {
Box::new(BitrotFileReader::new(inner, algo, till_offset, shard_size))
pub fn new_bitrot_filereader(
disk: Arc<Disk>,
data: Option<Vec<u8>>,
volume: String,
file_path: String,
till_offset: usize,
algo: BitrotAlgorithm,
shard_size: usize,
) -> BitrotReader {
Box::new(BitrotFileReader::new(disk, data, volume, file_path, algo, till_offset, shard_size))
}
#[cfg(test)]
mod test {
use std::{collections::HashMap, fs};
use std::collections::HashMap;
use hex_simd::decode_to_vec;
use tempfile::TempDir;
use crate::{
bitrot::{new_bitrot_writer, BITROT_ALGORITHMS},
disk::{endpoint::Endpoint, error::DiskError, new_disk, DiskAPI, DiskOption},
disk::error::DiskError,
error::{Error, Result},
store_api::BitrotAlgorithm,
};
use super::{bitrot_writer_sum, new_bitrot_reader};
// use super::{bitrot_writer_sum, new_bitrot_reader};
#[test]
fn bitrot_self_test() -> Result<()> {
@@ -674,47 +789,47 @@ mod test {
Ok(())
}
#[tokio::test]
async fn test_all_bitrot_algorithms() -> Result<()> {
for algo in BITROT_ALGORITHMS.keys() {
test_bitrot_reader_writer_algo(algo.clone()).await?;
}
// #[tokio::test]
// async fn test_all_bitrot_algorithms() -> Result<()> {
// for algo in BITROT_ALGORITHMS.keys() {
// test_bitrot_reader_writer_algo(algo.clone()).await?;
// }
Ok(())
}
// Ok(())
// }
async fn test_bitrot_reader_writer_algo(algo: BitrotAlgorithm) -> Result<()> {
let temp_dir = TempDir::new().unwrap().path().to_string_lossy().to_string();
fs::create_dir_all(&temp_dir)?;
let volume = "testvol";
let file_path = "testfile";
// async fn test_bitrot_reader_writer_algo(algo: BitrotAlgorithm) -> Result<()> {
// let temp_dir = TempDir::new().unwrap().path().to_string_lossy().to_string();
// fs::create_dir_all(&temp_dir)?;
// let volume = "testvol";
// let file_path = "testfile";
let ep = Endpoint::try_from(temp_dir.as_str())?;
let opt = DiskOption::default();
let disk = new_disk(&ep, &opt).await?;
disk.make_volume(volume).await?;
let mut writer = new_bitrot_writer(disk.clone(), "", volume, file_path, 35, algo.clone(), 10).await?;
// let ep = Endpoint::try_from(temp_dir.as_str())?;
// let opt = DiskOption::default();
// let disk = new_disk(&ep, &opt).await?;
// disk.make_volume(volume).await?;
// let mut writer = new_bitrot_writer(disk.clone(), "", volume, file_path, 35, algo.clone(), 10).await?;
writer.write(b"aaaaaaaaaa").await?;
writer.write(b"aaaaaaaaaa").await?;
writer.write(b"aaaaaaaaaa").await?;
writer.write(b"aaaaa").await?;
// writer.write(b"aaaaaaaaaa").await?;
// writer.write(b"aaaaaaaaaa").await?;
// writer.write(b"aaaaaaaaaa").await?;
// writer.write(b"aaaaa").await?;
let sum = bitrot_writer_sum(&writer);
writer.close().await?;
// let sum = bitrot_writer_sum(&writer);
// writer.close().await?;
let mut reader = new_bitrot_reader(disk, b"", volume, file_path, 35, algo, &sum, 10);
let read_len = 10;
let mut result: Vec<u8>;
(result, _) = reader.read_at(0, read_len).await?;
assert_eq!(result, b"aaaaaaaaaa");
(result, _) = reader.read_at(10, read_len).await?;
assert_eq!(result, b"aaaaaaaaaa");
(result, _) = reader.read_at(20, read_len).await?;
assert_eq!(result, b"aaaaaaaaaa");
(result, _) = reader.read_at(30, read_len / 2).await?;
assert_eq!(result, b"aaaaa");
// let mut reader = new_bitrot_reader(disk, b"", volume, file_path, 35, algo, &sum, 10);
// let read_len = 10;
// let mut result: Vec<u8>;
// (result, _) = reader.read_at(0, read_len).await?;
// assert_eq!(result, b"aaaaaaaaaa");
// (result, _) = reader.read_at(10, read_len).await?;
// assert_eq!(result, b"aaaaaaaaaa");
// (result, _) = reader.read_at(20, read_len).await?;
// assert_eq!(result, b"aaaaaaaaaa");
// (result, _) = reader.read_at(30, read_len / 2).await?;
// assert_eq!(result, b"aaaaa");
Ok(())
}
// Ok(())
// }
}

View File

@@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use time::OffsetDateTime;
use tracing::{error, info};
use tracing::error;
use crate::config::common::{read_config, save_config};
use crate::error::{Error, Result};
@@ -311,7 +311,7 @@ impl BucketMetadata {
buf.extend_from_slice(&data);
save_config(store, self.save_file_path().as_str(), &buf).await?;
save_config(store, self.save_file_path().as_str(), buf).await?;
Ok(())
}
@@ -367,7 +367,7 @@ pub async fn load_bucket_metadata_parse(api: Arc<ECStore>, bucket: &str, parse:
return Err(err);
}
info!("bucketmeta {} not found with err {:?}, start to init ", bucket, &err);
// info!("bucketmeta {} not found with err {:?}, start to init ", bucket, &err);
BucketMetadata::new(bucket)
}

View File

@@ -164,7 +164,7 @@ pub async fn list_path_raw(mut rx: B_Receiver<bool>, opts: ListPathRawOptions) -
let entry = match r.peek().await {
Ok(res) => {
if let Some(entry) = res {
info!("read entry disk: {}, name: {}", i, entry.name);
// info!("read entry disk: {}, name: {}", i, entry.name);
entry
} else {
// eof

View File

@@ -1,6 +1,3 @@
use std::collections::HashSet;
use std::sync::Arc;
use super::error::{is_err_config_not_found, ConfigError};
use super::{storageclass, Config, GLOBAL_StorageClass, KVS};
use crate::disk::RUSTFS_META_BUCKET;
@@ -10,8 +7,9 @@ use crate::store_err::is_err_object_not_found;
use crate::utils::path::SLASH_SEPARATOR;
use http::HeaderMap;
use lazy_static::lazy_static;
use s3s::dto::StreamingBlob;
use s3s::Body;
use std::collections::HashSet;
use std::io::Cursor;
use std::sync::Arc;
use tracing::{error, warn};
pub const CONFIG_PREFIX: &str = "config";
@@ -59,7 +57,7 @@ pub async fn read_config_with_metadata<S: StorageAPI>(
Ok((data, rd.object_info))
}
pub async fn save_config<S: StorageAPI>(api: Arc<S>, file: &str, data: &[u8]) -> Result<()> {
pub async fn save_config<S: StorageAPI>(api: Arc<S>, file: &str, data: Vec<u8>) -> Result<()> {
save_config_with_opts(
api,
file,
@@ -96,14 +94,10 @@ pub async fn delete_config<S: StorageAPI>(api: Arc<S>, file: &str) -> Result<()>
}
}
async fn save_config_with_opts<S: StorageAPI>(api: Arc<S>, file: &str, data: &[u8], opts: &ObjectOptions) -> Result<()> {
async fn save_config_with_opts<S: StorageAPI>(api: Arc<S>, file: &str, data: Vec<u8>, opts: &ObjectOptions) -> Result<()> {
let size = data.len();
let _ = api
.put_object(
RUSTFS_META_BUCKET,
file,
&mut PutObjReader::new(StreamingBlob::from(Body::from(data.to_vec())), data.len()),
opts,
)
.put_object(RUSTFS_META_BUCKET, file, &mut PutObjReader::new(Box::new(Cursor::new(data)), size), opts)
.await?;
Ok(())
}
@@ -174,7 +168,7 @@ async fn save_server_config<S: StorageAPI>(api: Arc<S>, cfg: &Config) -> Result<
let config_file = format!("{}{}{}", CONFIG_PREFIX, SLASH_SEPARATOR, CONFIG_FILE);
save_config(api, &config_file, data.as_slice()).await
save_config(api, &config_file, data).await
}
pub async fn lookup_configs<S: StorageAPI>(cfg: &mut Config, api: Arc<S>) {

View File

@@ -5,9 +5,9 @@ use super::error::{
use super::os::{is_root_disk, rename_all};
use super::{endpoint::Endpoint, error::DiskError, format::FormatV3};
use super::{
os, CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskLocation, DiskMetrics, FileInfoVersions,
FileReader, FileWriter, Info, MetaCacheEntry, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp,
UpdateMetadataOpts, VolumeInfo, WalkDirOptions, BUCKET_META_PREFIX, RUSTFS_META_BUCKET, STORAGE_FORMAT_FILE_BACKUP,
os, CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskLocation, DiskMetrics, FileInfoVersions, Info,
MetaCacheEntry, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, UpdateMetadataOpts, VolumeInfo,
WalkDirOptions, BUCKET_META_PREFIX, RUSTFS_META_BUCKET, STORAGE_FORMAT_FILE_BACKUP,
};
use crate::bitrot::bitrot_verify;
use crate::bucket::metadata_sys::{self};
@@ -17,7 +17,7 @@ use crate::disk::error::{
is_sys_err_not_dir, map_err_not_exists, os_err_to_file_err,
};
use crate::disk::os::{check_path_length, is_empty_dir};
use crate::disk::{LocalFileReader, LocalFileWriter, STORAGE_FORMAT_FILE};
use crate::disk::STORAGE_FORMAT_FILE;
use crate::error::{Error, Result};
use crate::file_meta::{get_file_info, read_xl_meta_no_data, FileInfoOpts};
use crate::global::{GLOBAL_IsErasureSD, GLOBAL_RootDiskThreshold};
@@ -27,6 +27,7 @@ use crate::heal::data_usage_cache::{DataUsageCache, DataUsageEntry};
use crate::heal::error::{ERR_IGNORE_FILE_CONTRIB, ERR_SKIP_FILE};
use crate::heal::heal_commands::{HealScanMode, HealingTracker};
use crate::heal::heal_ops::HEALING_TRACKER_FILENAME;
use crate::io::{FileReader, FileWriter};
use crate::metacache::writer::MetacacheWriter;
use crate::new_object_layer_fn;
use crate::set_disk::{
@@ -49,7 +50,8 @@ use common::defer;
use path_absolutize::Absolutize;
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::io::Cursor;
use std::io::SeekFrom;
use std::os::unix::fs::MetadataExt;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use std::time::{Duration, SystemTime};
@@ -59,7 +61,7 @@ use std::{
};
use time::OffsetDateTime;
use tokio::fs::{self, File};
use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt, ErrorKind};
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, ErrorKind};
use tokio::sync::mpsc::Sender;
use tokio::sync::RwLock;
use tracing::{error, info, warn};
@@ -325,7 +327,7 @@ impl LocalDisk {
}
}
// FIXME: 先清空回收站吧,有时间再添加判断逻辑
// TODO: 优化 FIXME: 先清空回收站吧,有时间再添加判断逻辑
if let Err(err) = {
if trash_path.is_dir() {
@@ -735,13 +737,16 @@ impl LocalDisk {
sum: &[u8],
shard_size: usize,
) -> Result<()> {
let mut file = utils::fs::open_file(part_path, O_CREATE | O_WRONLY)
let file = utils::fs::open_file(part_path, O_CREATE | O_WRONLY)
.await
.map_err(os_err_to_file_err)?;
let mut data = Vec::new();
let n = file.read_to_end(&mut data).await?;
bitrot_verify(&mut Cursor::new(data), n, part_size, algo, sum.to_vec(), shard_size)
// let mut data = Vec::new();
// let n = file.read_to_end(&mut data).await?;
let meta = file.metadata().await?;
bitrot_verify(Box::new(file), meta.size() as usize, part_size, algo, sum.to_vec(), shard_size).await
}
async fn scan_dir<W: AsyncWrite + Unpin>(
@@ -1285,6 +1290,7 @@ impl DiskAPI for LocalDisk {
Ok(resp)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Vec<u8>) -> Result<()> {
let src_volume_dir = self.get_bucket_path(src_volume)?;
let dst_volume_dir = self.get_bucket_path(dst_volume)?;
@@ -1299,12 +1305,18 @@ impl DiskAPI for LocalDisk {
let dst_is_dir = has_suffix(dst_path, SLASH_SEPARATOR);
if !src_is_dir && dst_is_dir || src_is_dir && !dst_is_dir {
warn!(
"rename_part src and dst must be both dir or file src_is_dir:{}, dst_is_dir:{}",
src_is_dir, dst_is_dir
);
return Err(Error::from(DiskError::FileAccessDenied));
}
let src_file_path = src_volume_dir.join(Path::new(src_path));
let dst_file_path = dst_volume_dir.join(Path::new(dst_path));
// warn!("rename_part src_file_path:{:?}, dst_file_path:{:?}", &src_file_path, &dst_file_path);
check_path_length(src_file_path.to_string_lossy().as_ref())?;
check_path_length(dst_file_path.to_string_lossy().as_ref())?;
@@ -1325,12 +1337,14 @@ impl DiskAPI for LocalDisk {
if let Some(meta) = meta_op {
if !meta.is_dir() {
warn!("rename_part src is not dir {:?}", &src_file_path);
return Err(Error::new(DiskError::FileAccessDenied));
}
}
if let Err(e) = utils::fs::remove(&dst_file_path).await {
if is_sys_err_not_empty(&e) || is_sys_err_not_dir(&e) {
warn!("rename_part remove dst failed {:?} err {:?}", &dst_file_path, e);
return Err(Error::new(DiskError::FileAccessDenied));
} else if is_sys_err_io(&e) {
return Err(Error::new(DiskError::FaultyDisk));
@@ -1343,6 +1357,7 @@ impl DiskAPI for LocalDisk {
if let Err(err) = os::rename_all(&src_file_path, &dst_file_path, &dst_volume_dir).await {
if let Some(e) = err.to_io_err() {
if is_sys_err_not_empty(&e) || is_sys_err_not_dir(&e) {
warn!("rename_part rename all failed {:?} err {:?}", &dst_file_path, e);
return Err(Error::new(DiskError::FileAccessDenied));
}
@@ -1455,8 +1470,10 @@ impl DiskAPI for LocalDisk {
Ok(())
}
// TODO: use io.reader
#[tracing::instrument(level = "debug", skip(self))]
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
// warn!("disk create_file: origvolume: {}, volume: {}, path: {}", origvolume, volume, path);
if !origvolume.is_empty() {
let origvolume_dir = self.get_bucket_path(origvolume)?;
if !skip_access_checks(origvolume) {
@@ -1479,12 +1496,16 @@ impl DiskAPI for LocalDisk {
.await
.map_err(os_err_to_file_err)?;
Ok(FileWriter::Local(LocalFileWriter::new(f)))
Ok(Box::new(f))
// Ok(())
}
#[tracing::instrument(level = "debug", skip(self))]
// async fn append_file(&self, volume: &str, path: &str, mut r: DuplexStream) -> Result<File> {
async fn append_file(&self, volume: &str, path: &str) -> Result<FileWriter> {
warn!("disk append_file: volume: {}, path: {}", volume, path);
let volume_dir = self.get_bucket_path(volume)?;
if !skip_access_checks(volume) {
if let Err(e) = utils::fs::access(&volume_dir).await {
@@ -1497,11 +1518,13 @@ impl DiskAPI for LocalDisk {
let f = self.open_file(file_path, O_CREATE | O_APPEND | O_WRONLY, volume_dir).await?;
Ok(FileWriter::Local(LocalFileWriter::new(f)))
Ok(Box::new(f))
}
// TODO: io verifier
#[tracing::instrument(level = "debug", skip(self))]
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader> {
// warn!("disk read_file: volume: {}, path: {}", volume, path);
let volume_dir = self.get_bucket_path(volume)?;
if !skip_access_checks(volume) {
if let Err(e) = utils::fs::access(&volume_dir).await {
@@ -1530,9 +1553,59 @@ impl DiskAPI for LocalDisk {
}
})?;
Ok(FileReader::Local(LocalFileReader::new(f)))
Ok(Box::new(f))
}
#[tracing::instrument(level = "debug", skip(self))]
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader> {
// warn!(
// "disk read_file_stream: volume: {}, path: {}, offset: {}, length: {}",
// volume, path, offset, length
// );
let volume_dir = self.get_bucket_path(volume)?;
if !skip_access_checks(volume) {
if let Err(e) = utils::fs::access(&volume_dir).await {
return Err(convert_access_error(e, DiskError::VolumeAccessDenied));
}
}
let file_path = volume_dir.join(Path::new(&path));
check_path_length(file_path.to_string_lossy().to_string().as_str())?;
let mut f = self.open_file(file_path, O_RDONLY, volume_dir).await.map_err(|err| {
if let Some(e) = err.to_io_err() {
if os_is_not_exist(&e) {
Error::new(DiskError::FileNotFound)
} else if os_is_permission(&e) || is_sys_err_not_dir(&e) {
Error::new(DiskError::FileAccessDenied)
} else if is_sys_err_io(&e) {
Error::new(DiskError::FaultyDisk)
} else if is_sys_err_too_many_files(&e) {
Error::new(DiskError::TooManyOpenFiles)
} else {
Error::new(e)
}
} else {
err
}
})?;
let meta = f.metadata().await?;
if meta.len() < (offset + length) as u64 {
error!(
"read_file_stream: file size is less than offset + length {} + {} = {}",
offset,
length,
meta.len()
);
return Err(Error::new(DiskError::FileCorrupt));
}
f.seek(SeekFrom::Start(offset as u64)).await?;
Ok(Box::new(f))
}
#[tracing::instrument(level = "debug", skip(self))]
async fn list_dir(&self, origvolume: &str, volume: &str, dir_path: &str, count: i32) -> Result<Vec<String>> {
if !origvolume.is_empty() {
@@ -1676,7 +1749,7 @@ impl DiskAPI for LocalDisk {
return Err(os_err_to_file_err(e));
}
info!("read xl.meta failed, dst_file_path: {:?}, err: {:?}", dst_file_path, e);
// info!("read xl.meta failed, dst_file_path: {:?}, err: {:?}", dst_file_path, e);
None
}
};
@@ -2175,7 +2248,6 @@ impl DiskAPI for LocalDisk {
}
async fn delete_volume(&self, volume: &str) -> Result<()> {
info!("delete_volume, volume: {}", volume);
let p = self.get_bucket_path(volume)?;
// TODO: 不能用递归删除如果目录下面有文件返回errVolumeNotEmpty
@@ -2219,6 +2291,9 @@ impl DiskAPI for LocalDisk {
self.scanning.fetch_add(1, Ordering::SeqCst);
defer!(|| { self.scanning.fetch_sub(1, Ordering::SeqCst) });
// must befor metadata_sys
let Some(store) = new_object_layer_fn() else { return Err(Error::msg("errServerNotInitialized")) };
// Check if the current bucket has replication configuration
if let Ok((rcfg, _)) = metadata_sys::get_replication_config(&cache.info.name).await {
if has_active_rules(&rcfg, "", true) {
@@ -2226,7 +2301,6 @@ impl DiskAPI for LocalDisk {
}
}
let Some(store) = new_object_layer_fn() else { return Err(Error::msg("errServerNotInitialized")) };
let loc = self.get_disk_location();
let disks = store.get_disks(loc.pool_idx.unwrap(), loc.disk_idx.unwrap()).await?;
let disk = Arc::new(LocalDisk::new(&self.endpoint(), false).await?);

View File

@@ -14,10 +14,8 @@ pub const FORMAT_CONFIG_FILE: &str = "format.json";
pub const STORAGE_FORMAT_FILE: &str = "xl.meta";
pub const STORAGE_FORMAT_FILE_BACKUP: &str = "xl.meta.bkp";
use crate::utils::proto_err_to_err;
use crate::{
bucket::{metadata_sys::get_versioning_config, versioning::VersioningApi},
erasure::Writer,
error::{Error, Result},
file_meta::{merge_file_meta_versions, FileMeta, FileMetaShallowVersion, VersionType},
heal::{
@@ -25,36 +23,19 @@ use crate::{
data_usage_cache::{DataUsageCache, DataUsageEntry},
heal_commands::{HealScanMode, HealingTracker},
},
io::{FileReader, FileWriter},
store_api::{FileInfo, ObjectInfo, RawFileInfo},
utils::path::SLASH_SEPARATOR,
};
use endpoint::Endpoint;
use error::DiskError;
use futures::StreamExt;
use local::LocalDisk;
use madmin::info_commands::DiskMetrics;
use protos::proto_gen::node_service::{
node_service_client::NodeServiceClient, ReadAtRequest, ReadAtResponse, WriteRequest, WriteResponse,
};
use remote::RemoteDisk;
use serde::{Deserialize, Serialize};
use std::{
any::Any,
cmp::Ordering,
fmt::Debug,
io::{Cursor, SeekFrom},
path::PathBuf,
sync::Arc,
};
use std::{cmp::Ordering, fmt::Debug, path::PathBuf, sync::Arc};
use time::OffsetDateTime;
use tokio::{
fs::File,
io::{AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt},
sync::mpsc::{self, Sender},
};
use tokio_stream::wrappers::ReceiverStream;
use tonic::{service::interceptor::InterceptedService, transport::Channel, Request, Status, Streaming};
use tracing::info;
use tokio::{io::AsyncWrite, sync::mpsc::Sender};
use tracing::warn;
use uuid::Uuid;
@@ -206,6 +187,13 @@ impl DiskAPI for Disk {
}
}
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader> {
match self {
Disk::Local(local_disk) => local_disk.read_file_stream(volume, path, offset, length).await,
Disk::Remote(remote_disk) => remote_disk.read_file_stream(volume, path, offset, length).await,
}
}
async fn list_dir(&self, _origvolume: &str, volume: &str, _dir_path: &str, _count: i32) -> Result<Vec<String>> {
match self {
Disk::Local(local_disk) => local_disk.list_dir(_origvolume, volume, _dir_path, _count).await,
@@ -336,7 +324,6 @@ impl DiskAPI for Disk {
}
async fn delete_volume(&self, volume: &str) -> Result<()> {
info!("delete_volume, volume: {}", volume);
match self {
Disk::Local(local_disk) => local_disk.delete_volume(volume).await,
Disk::Remote(remote_disk) => remote_disk.delete_volume(volume).await,
@@ -357,7 +344,6 @@ impl DiskAPI for Disk {
scan_mode: HealScanMode,
we_sleep: ShouldSleepFn,
) -> Result<DataUsageCache> {
info!("ns_scanner");
match self {
Disk::Local(local_disk) => local_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
Disk::Remote(remote_disk) => remote_disk.ns_scanner(cache, updates, scan_mode, we_sleep).await,
@@ -451,6 +437,7 @@ pub trait DiskAPI: Debug + Send + Sync + 'static {
// 读目录下的所有文件、目录
async fn list_dir(&self, origvolume: &str, volume: &str, dir_path: &str, count: i32) -> Result<Vec<String>>;
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader>;
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader>;
async fn append_file(&self, volume: &str, path: &str) -> Result<FileWriter>;
async fn create_file(&self, origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter>;
// ReadFileStream
@@ -1188,20 +1175,6 @@ pub struct ReadMultipleResp {
pub mod_time: Option<OffsetDateTime>,
}
// impl Default for ReadMultipleResp {
// fn default() -> Self {
// Self {
// bucket: String::new(),
// prefix: String::new(),
// file: String::new(),
// exists: false,
// error: String::new(),
// data: Vec::new(),
// mod_time: OffsetDateTime::UNIX_EPOCH,
// }
// }
// }
#[derive(Debug, Deserialize, Serialize)]
pub struct VolumeInfo {
pub name: String,
@@ -1214,383 +1187,3 @@ pub struct ReadOptions {
pub read_data: bool,
pub healing: bool,
}
// pub struct FileWriter {
// pub inner: Pin<Box<dyn AsyncWrite + Send + Sync + 'static>>,
// }
// impl AsyncWrite for FileWriter {
// fn poll_write(
// mut self: Pin<&mut Self>,
// cx: &mut std::task::Context<'_>,
// buf: &[u8],
// ) -> std::task::Poll<std::result::Result<usize, std::io::Error>> {
// Pin::new(&mut self.inner).poll_write(cx, buf)
// }
// fn poll_flush(
// mut self: Pin<&mut Self>,
// cx: &mut std::task::Context<'_>,
// ) -> std::task::Poll<std::result::Result<(), std::io::Error>> {
// Pin::new(&mut self.inner).poll_flush(cx)
// }
// fn poll_shutdown(
// mut self: Pin<&mut Self>,
// cx: &mut std::task::Context<'_>,
// ) -> std::task::Poll<std::result::Result<(), std::io::Error>> {
// Pin::new(&mut self.inner).poll_shutdown(cx)
// }
// }
// impl FileWriter {
// pub fn new<W>(inner: W) -> Self
// where
// W: AsyncWrite + Send + Sync + 'static,
// {
// Self { inner: Box::pin(inner) }
// }
// }
#[derive(Debug)]
pub enum FileWriter {
Local(LocalFileWriter),
Remote(RemoteFileWriter),
Buffer(BufferWriter),
}
#[async_trait::async_trait]
impl Writer for FileWriter {
fn as_any(&self) -> &dyn Any {
self
}
async fn write(&mut self, buf: &[u8]) -> Result<()> {
match self {
Self::Local(writer) => writer.write(buf).await,
Self::Remote(writter) => writter.write(buf).await,
Self::Buffer(writer) => writer.write(buf).await,
}
}
}
#[derive(Debug)]
pub struct BufferWriter {
pub inner: Vec<u8>,
}
impl BufferWriter {
pub fn new(inner: Vec<u8>) -> Self {
Self { inner }
}
#[allow(clippy::should_implement_trait)]
pub fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
#[async_trait::async_trait]
impl Writer for BufferWriter {
fn as_any(&self) -> &dyn Any {
self
}
async fn write(&mut self, buf: &[u8]) -> Result<()> {
let _ = self.inner.write(buf).await?;
self.inner.flush().await?;
Ok(())
}
}
#[derive(Debug)]
pub struct LocalFileWriter {
pub inner: File,
}
impl LocalFileWriter {
pub fn new(inner: File) -> Self {
Self { inner }
}
}
#[async_trait::async_trait]
impl Writer for LocalFileWriter {
fn as_any(&self) -> &dyn Any {
self
}
async fn write(&mut self, buf: &[u8]) -> Result<()> {
let _ = self.inner.write(buf).await?;
self.inner.flush().await?;
Ok(())
}
}
type NodeClient = NodeServiceClient<
InterceptedService<Channel, Box<dyn Fn(Request<()>) -> Result<Request<()>, Status> + Send + Sync + 'static>>,
>;
#[derive(Debug)]
pub struct RemoteFileWriter {
pub endpoint: Endpoint,
pub volume: String,
pub path: String,
pub is_append: bool,
tx: Sender<WriteRequest>,
resp_stream: Streaming<WriteResponse>,
}
impl RemoteFileWriter {
pub async fn new(endpoint: Endpoint, volume: String, path: String, is_append: bool, mut client: NodeClient) -> Result<Self> {
let (tx, rx) = mpsc::channel(128);
let in_stream = ReceiverStream::new(rx);
let response = client.write_stream(in_stream).await.unwrap();
let resp_stream = response.into_inner();
Ok(Self {
endpoint,
volume,
path,
is_append,
tx,
resp_stream,
})
}
}
#[async_trait::async_trait]
impl Writer for RemoteFileWriter {
fn as_any(&self) -> &dyn Any {
self
}
async fn write(&mut self, buf: &[u8]) -> Result<()> {
let request = WriteRequest {
disk: self.endpoint.to_string(),
volume: self.volume.to_string(),
path: self.path.to_string(),
is_append: self.is_append,
data: buf.to_vec(),
};
self.tx.send(request).await?;
if let Some(resp) = self.resp_stream.next().await {
// match resp {
// Ok(resp) => {
// if resp.success {
// info!("write stream success");
// } else {
// info!("write stream failed: {}", resp.error_info.unwrap_or("".to_string()));
// }
// }
// Err(_err) => {
// }
// }
let resp = resp?;
if resp.success {
info!("write stream success");
} else {
return if let Some(err) = &resp.error {
Err(proto_err_to_err(err))
} else {
Err(Error::from_string(""))
};
}
} else {
let error_info = "can not get response";
info!("write stream failed: {}", error_info);
return Err(Error::from_string(error_info));
}
Ok(())
}
}
#[async_trait::async_trait]
pub trait Reader {
async fn read_at(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize>;
async fn seek(&mut self, offset: usize) -> Result<()>;
async fn read_exact(&mut self, buf: &mut [u8]) -> Result<usize>;
}
#[derive(Debug)]
pub enum FileReader {
Local(LocalFileReader),
Remote(RemoteFileReader),
Buffer(BufferReader),
}
#[async_trait::async_trait]
impl Reader for FileReader {
async fn read_at(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize> {
match self {
Self::Local(reader) => reader.read_at(offset, buf).await,
Self::Remote(reader) => reader.read_at(offset, buf).await,
Self::Buffer(reader) => reader.read_at(offset, buf).await,
}
}
async fn seek(&mut self, offset: usize) -> Result<()> {
match self {
Self::Local(reader) => reader.seek(offset).await,
Self::Remote(reader) => reader.seek(offset).await,
Self::Buffer(reader) => reader.seek(offset).await,
}
}
async fn read_exact(&mut self, buf: &mut [u8]) -> Result<usize> {
match self {
Self::Local(reader) => reader.read_exact(buf).await,
Self::Remote(reader) => reader.read_exact(buf).await,
Self::Buffer(reader) => reader.read_exact(buf).await,
}
}
}
#[derive(Debug)]
pub struct BufferReader {
pub inner: Cursor<Vec<u8>>,
pos: usize,
}
impl BufferReader {
pub fn new(inner: Vec<u8>) -> Self {
Self {
inner: Cursor::new(inner),
pos: 0,
}
}
}
#[async_trait::async_trait]
impl Reader for BufferReader {
#[tracing::instrument(level = "debug", skip(self, buf))]
async fn read_at(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize> {
self.seek(offset).await?;
self.read_exact(buf).await
}
#[tracing::instrument(level = "debug", skip(self))]
async fn seek(&mut self, offset: usize) -> Result<()> {
if self.pos != offset {
self.inner.set_position(offset as u64);
}
Ok(())
}
#[tracing::instrument(level = "debug", skip(self))]
async fn read_exact(&mut self, buf: &mut [u8]) -> Result<usize> {
let bytes_read = self.inner.read_exact(buf).await?;
self.pos += buf.len();
Ok(bytes_read)
}
}
#[derive(Debug)]
pub struct LocalFileReader {
pub inner: File,
pos: usize,
}
impl LocalFileReader {
pub fn new(inner: File) -> Self {
Self { inner, pos: 0 }
}
}
#[async_trait::async_trait]
impl Reader for LocalFileReader {
#[tracing::instrument(level = "debug", skip(self, buf))]
async fn read_at(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize> {
self.seek(offset).await?;
self.read_exact(buf).await
}
#[tracing::instrument(level = "debug", skip(self))]
async fn seek(&mut self, offset: usize) -> Result<()> {
if self.pos != offset {
self.inner.seek(SeekFrom::Start(offset as u64)).await?;
self.pos = offset;
}
Ok(())
}
#[tracing::instrument(level = "debug", skip(self, buf))]
async fn read_exact(&mut self, buf: &mut [u8]) -> Result<usize> {
let bytes_read = self.inner.read_exact(buf).await?;
self.pos += buf.len();
Ok(bytes_read)
}
}
#[derive(Debug)]
pub struct RemoteFileReader {
pub endpoint: Endpoint,
pub volume: String,
pub path: String,
tx: Sender<ReadAtRequest>,
resp_stream: Streaming<ReadAtResponse>,
}
impl RemoteFileReader {
pub async fn new(endpoint: Endpoint, volume: String, path: String, mut client: NodeClient) -> Result<Self> {
let (tx, rx) = mpsc::channel(128);
let in_stream = ReceiverStream::new(rx);
let response = client.read_at(in_stream).await.unwrap();
let resp_stream = response.into_inner();
Ok(Self {
endpoint,
volume,
path,
tx,
resp_stream,
})
}
}
#[async_trait::async_trait]
impl Reader for RemoteFileReader {
async fn read_at(&mut self, offset: usize, buf: &mut [u8]) -> Result<usize> {
let request = ReadAtRequest {
disk: self.endpoint.to_string(),
volume: self.volume.to_string(),
path: self.path.to_string(),
offset: offset.try_into().unwrap(),
// length: length.try_into().unwrap(),
length: buf.len().try_into().unwrap(),
};
self.tx.send(request).await?;
if let Some(resp) = self.resp_stream.next().await {
let resp = resp?;
if resp.success {
info!("read at stream success");
buf.copy_from_slice(&resp.data);
Ok(resp.read_size.try_into().unwrap())
} else {
return if let Some(err) = &resp.error {
Err(proto_err_to_err(err))
} else {
Err(Error::from_string(""))
};
}
} else {
let error_info = "can not get response";
info!("read at stream failed: {}", error_info);
Err(Error::from_string(error_info))
}
}
async fn seek(&mut self, _offset: usize) -> Result<()> {
unimplemented!()
}
async fn read_exact(&mut self, _buf: &mut [u8]) -> Result<usize> {
unimplemented!()
}
}

View File

@@ -141,13 +141,15 @@ pub async fn reliable_rename(
}
// need remove dst path
if let Err(err) = utils::fs::remove_all(dst_file_path.as_ref()).await {
info!(
"reliable_rename rm dst failed. src_file_path: {:?}, dst_file_path: {:?}, base_dir: {:?}, err: {:?}",
src_file_path.as_ref(),
dst_file_path.as_ref(),
base_dir.as_ref(),
err
);
if err.kind() != io::ErrorKind::NotFound {
info!(
"reliable_rename rm dst failed. src_file_path: {:?}, dst_file_path: {:?}, base_dir: {:?}, err: {:?}",
src_file_path.as_ref(),
dst_file_path.as_ref(),
base_dir.as_ref(),
err
);
}
}
let mut i = 0;
loop {

View File

@@ -23,10 +23,9 @@ use uuid::Uuid;
use super::{
endpoint::Endpoint, CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskLocation, DiskOption,
FileInfoVersions, FileReader, FileWriter, ReadMultipleReq, ReadMultipleResp, ReadOptions, RemoteFileReader, RemoteFileWriter,
RenameDataResp, UpdateMetadataOpts, VolumeInfo, WalkDirOptions,
FileInfoVersions, ReadMultipleReq, ReadMultipleResp, ReadOptions, RenameDataResp, UpdateMetadataOpts, VolumeInfo,
WalkDirOptions,
};
use crate::utils::proto_err_to_err;
use crate::{
disk::error::DiskError,
error::{Error, Result},
@@ -38,6 +37,10 @@ use crate::{
store_api::{FileInfo, RawFileInfo},
};
use crate::{disk::MetaCacheEntry, metacache::writer::MetacacheWriter};
use crate::{
io::{FileReader, FileWriter, HttpFileReader, HttpFileWriter},
utils::proto_err_to_err,
};
use protos::proto_gen::node_service::RenamePartRequst;
#[derive(Debug)]
@@ -132,7 +135,7 @@ impl DiskAPI for RemoteDisk {
}
async fn read_all(&self, volume: &str, path: &str) -> Result<Vec<u8>> {
info!("read_all");
info!("read_all {}/{}", volume, path);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?;
@@ -144,8 +147,6 @@ impl DiskAPI for RemoteDisk {
let response = client.read_all(request).await?.into_inner();
info!("read_all success");
if !response.success {
return Err(Error::new(DiskError::FileNotFound));
}
@@ -179,7 +180,7 @@ impl DiskAPI for RemoteDisk {
}
async fn delete(&self, volume: &str, path: &str, opt: DeleteOptions) -> Result<()> {
info!("delete");
info!("delete {}/{}/{}", self.endpoint.to_string(), volume, path);
let options = serde_json::to_string(&opt)?;
let mut client = node_service_time_out_client(&self.addr)
.await
@@ -261,7 +262,7 @@ impl DiskAPI for RemoteDisk {
}
async fn rename_part(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str, meta: Vec<u8>) -> Result<()> {
info!("rename_part");
info!("rename_part {}/{}", src_volume, src_path);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?;
@@ -286,6 +287,7 @@ impl DiskAPI for RemoteDisk {
Ok(())
}
#[tracing::instrument(level = "debug", skip(self))]
async fn rename_file(&self, src_volume: &str, src_path: &str, dst_volume: &str, dst_path: &str) -> Result<()> {
info!("rename_file");
let mut client = node_service_time_out_client(&self.addr)
@@ -312,55 +314,59 @@ impl DiskAPI for RemoteDisk {
Ok(())
}
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, _file_size: usize) -> Result<FileWriter> {
info!("create_file");
Ok(FileWriter::Remote(
RemoteFileWriter::new(
self.endpoint.clone(),
volume.to_string(),
path.to_string(),
false,
node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?,
)
.await?,
))
#[tracing::instrument(level = "debug", skip(self))]
async fn create_file(&self, _origvolume: &str, volume: &str, path: &str, file_size: usize) -> Result<FileWriter> {
info!("create_file {}/{}/{}", self.endpoint.to_string(), volume, path);
Ok(Box::new(HttpFileWriter::new(
self.endpoint.grid_host().as_str(),
self.endpoint.to_string().as_str(),
volume,
path,
file_size,
false,
)?))
}
#[tracing::instrument(level = "debug", skip(self))]
async fn append_file(&self, volume: &str, path: &str) -> Result<FileWriter> {
info!("append_file");
Ok(FileWriter::Remote(
RemoteFileWriter::new(
self.endpoint.clone(),
volume.to_string(),
path.to_string(),
true,
node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?,
)
.await?,
info!("append_file {}/{}", volume, path);
Ok(Box::new(HttpFileWriter::new(
self.endpoint.grid_host().as_str(),
self.endpoint.to_string().as_str(),
volume,
path,
0,
true,
)?))
}
#[tracing::instrument(level = "debug", skip(self))]
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader> {
info!("read_file {}/{}", volume, path);
Ok(Box::new(
HttpFileReader::new(self.endpoint.grid_host().as_str(), self.endpoint.to_string().as_str(), volume, path, 0, 0)
.await?,
))
}
async fn read_file(&self, volume: &str, path: &str) -> Result<FileReader> {
info!("read_file");
Ok(FileReader::Remote(
RemoteFileReader::new(
self.endpoint.clone(),
volume.to_string(),
path.to_string(),
node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?,
#[tracing::instrument(level = "debug", skip(self))]
async fn read_file_stream(&self, volume: &str, path: &str, offset: usize, length: usize) -> Result<FileReader> {
info!("read_file_stream {}/{}/{}", self.endpoint.to_string(), volume, path);
Ok(Box::new(
HttpFileReader::new(
self.endpoint.grid_host().as_str(),
self.endpoint.to_string().as_str(),
volume,
path,
offset,
length,
)
.await?,
))
}
async fn list_dir(&self, _origvolume: &str, volume: &str, _dir_path: &str, _count: i32) -> Result<Vec<String>> {
info!("list_dir");
info!("list_dir {}/{}", volume, _dir_path);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?;
@@ -384,7 +390,8 @@ impl DiskAPI for RemoteDisk {
// FIXME: TODO: use writer
async fn walk_dir<W: AsyncWrite + Unpin + Send>(&self, opts: WalkDirOptions, wr: &mut W) -> Result<()> {
info!("walk_dir");
let now = std::time::SystemTime::now();
info!("walk_dir {}/{}/{:?}", self.endpoint.to_string(), opts.bucket, opts.filter_prefix);
let mut wr = wr;
let mut out = MetacacheWriter::new(&mut wr);
let mut buf = Vec::new();
@@ -413,6 +420,12 @@ impl DiskAPI for RemoteDisk {
}
}
info!(
"walk_dir {}/{:?} done {:?}",
opts.bucket,
opts.filter_prefix,
now.elapsed().unwrap_or_default()
);
Ok(())
}
@@ -424,7 +437,7 @@ impl DiskAPI for RemoteDisk {
dst_volume: &str,
dst_path: &str,
) -> Result<RenameDataResp> {
info!("rename_data");
info!("rename_data {}/{}/{}/{}", self.addr, self.endpoint.to_string(), dst_volume, dst_path);
let file_info = serde_json::to_string(&fi)?;
let mut client = node_service_time_out_client(&self.addr)
.await
@@ -606,7 +619,7 @@ impl DiskAPI for RemoteDisk {
}
async fn write_metadata(&self, _org_volume: &str, volume: &str, path: &str, fi: FileInfo) -> Result<()> {
info!("write_metadata");
info!("write_metadata {}/{}", volume, path);
let file_info = serde_json::to_string(&fi)?;
let mut client = node_service_time_out_client(&self.addr)
.await
@@ -668,7 +681,7 @@ impl DiskAPI for RemoteDisk {
}
async fn read_xl(&self, volume: &str, path: &str, read_data: bool) -> Result<RawFileInfo> {
info!("read_xl");
info!("read_xl {}/{}/{}", self.endpoint.to_string(), volume, path);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?;
@@ -777,7 +790,7 @@ impl DiskAPI for RemoteDisk {
}
async fn read_multiple(&self, req: ReadMultipleReq) -> Result<Vec<ReadMultipleResp>> {
info!("read_multiple");
info!("read_multiple {}/{}/{}", self.endpoint.to_string(), req.bucket, req.prefix);
let read_multiple_req = serde_json::to_string(&req)?;
let mut client = node_service_time_out_client(&self.addr)
.await
@@ -807,7 +820,7 @@ impl DiskAPI for RemoteDisk {
}
async fn delete_volume(&self, volume: &str) -> Result<()> {
info!("delete_volume");
info!("delete_volume {}/{}", self.endpoint.to_string(), volume);
let mut client = node_service_time_out_client(&self.addr)
.await
.map_err(|err| Error::from_string(format!("can not get client, err: {}", err)))?;
@@ -830,7 +843,6 @@ impl DiskAPI for RemoteDisk {
}
async fn disk_info(&self, opts: &DiskInfoOptions) -> Result<DiskInfo> {
info!("delete_volume");
let opts = serde_json::to_string(&opts)?;
let mut client = node_service_time_out_client(&self.addr)
.await

View File

@@ -1,14 +1,11 @@
use crate::bitrot::{BitrotReader, BitrotWriter};
use crate::error::{Error, Result, StdError};
use crate::error::{Error, Result};
use crate::quorum::{object_op_ignored_errs, reduce_write_quorum_errs};
use bytes::Bytes;
use futures::future::join_all;
use futures::{pin_mut, Stream, StreamExt};
use reed_solomon_erasure::galois_8::ReedSolomon;
use std::any::Any;
use std::fmt::Debug;
use std::io::ErrorKind;
use tokio::io::DuplexStream;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tracing::warn;
use tracing::{error, info};
@@ -50,22 +47,22 @@ impl Erasure {
}
}
#[tracing::instrument(level = "debug", skip(self, body, writers))]
#[tracing::instrument(level = "debug", skip(self, reader, writers))]
pub async fn encode<S>(
&mut self,
body: S,
reader: &mut S,
writers: &mut [Option<BitrotWriter>],
// block_size: usize,
total_size: usize,
write_quorum: usize,
) -> Result<usize>
where
S: Stream<Item = Result<Bytes, StdError>> + Send + Sync,
S: AsyncRead + Unpin + Send + 'static,
{
pin_mut!(body);
let mut reader = tokio_util::io::StreamReader::new(
body.map(|f| f.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))),
);
// pin_mut!(body);
// let mut reader = tokio_util::io::StreamReader::new(
// body.map(|f| f.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))),
// );
let mut total: usize = 0;
@@ -102,6 +99,7 @@ impl Erasure {
let blocks = self.encode_data(&self.buf)?;
let mut errs = Vec::new();
// TODO: 并发写入
for (i, w_op) in writers.iter_mut().enumerate() {
if let Some(w) = w_op {
match w.write(blocks[i].as_ref()).await {
@@ -205,14 +203,17 @@ impl Erasure {
// Ok(total)
}
pub async fn decode(
pub async fn decode<W>(
&self,
writer: &mut DuplexStream,
writer: &mut W,
readers: Vec<Option<BitrotReader>>,
offset: usize,
length: usize,
total_length: usize,
) -> (usize, Option<Error>) {
) -> (usize, Option<Error>)
where
W: AsyncWriteExt + Send + Unpin + 'static,
{
if length == 0 {
return (0, None);
}
@@ -282,14 +283,17 @@ impl Erasure {
(bytes_writed, None)
}
async fn write_data_blocks(
async fn write_data_blocks<W>(
&self,
writer: &mut DuplexStream,
writer: &mut W,
bufs: Vec<Option<Vec<u8>>>,
data_blocks: usize,
offset: usize,
length: usize,
) -> Result<usize> {
) -> Result<usize>
where
W: AsyncWrite + Send + Unpin + 'static,
{
if bufs.len() < data_blocks {
return Err(Error::msg("read bufs not match data_blocks"));
}
@@ -419,6 +423,7 @@ impl Erasure {
// num_shards * self.shard_size(self.block_size)
}
// where erasure reading begins.
pub fn shard_file_offset(&self, start_offset: usize, length: usize, total_length: usize) -> usize {
let shard_size = self.shard_size(self.block_size);
let shard_file_size = self.shard_file_size(total_length);
@@ -499,11 +504,10 @@ pub trait Writer {
}
#[async_trait::async_trait]
pub trait ReadAt: Debug {
pub trait ReadAt {
async fn read_at(&mut self, offset: usize, length: usize) -> Result<(Vec<u8>, usize)>;
}
#[derive(Debug)]
pub struct ShardReader {
readers: Vec<Option<BitrotReader>>, // 磁盘
data_block_count: usize, // 总的分片数量
@@ -528,6 +532,7 @@ impl ShardReader {
pub async fn read(&mut self) -> Result<Vec<Option<Vec<u8>>>> {
// let mut disks = self.readers;
let reader_length = self.readers.len();
// 需要读取的块长度
let mut read_length = self.shard_size;
if self.offset + read_length > self.shard_file_size {
read_length = self.shard_file_size - self.offset

View File

@@ -217,7 +217,7 @@ async fn run_data_scanner() {
globalScannerMetrics.write().await.set_cycle(Some(cycle_info.clone())).await;
let mut wr = Vec::new();
cycle_info.serialize(&mut Serializer::new(&mut wr)).unwrap();
let _ = save_config(store.clone(), &DATA_USAGE_BLOOM_NAME_PATH, &wr).await;
let _ = save_config(store.clone(), &DATA_USAGE_BLOOM_NAME_PATH, wr).await;
}
Err(err) => {
info!("ns_scanner failed: {:?}", err);
@@ -268,7 +268,7 @@ async fn save_background_heal_info(store: Arc<ECStore>, info: &BackgroundHealInf
Ok(info) => info,
Err(_) => return,
};
let _ = save_config(store, &BACKGROUND_HEAL_INFO_PATH, &b).await;
let _ = save_config(store, &BACKGROUND_HEAL_INFO_PATH, b).await;
}
async fn get_cycle_scan_mode(current_cycle: u64, bitrot_start_cycle: u64, bitrot_start_time: SystemTime) -> HealScanMode {

View File

@@ -124,10 +124,11 @@ pub async fn store_data_usage_in_backend(mut rx: Receiver<DataUsageInfo>) {
Some(data_usage_info) => {
if let Ok(data) = serde_json::to_vec(&data_usage_info) {
if attempts > 10 {
let _ = save_config(store.clone(), &format!("{}{}", *DATA_USAGE_OBJ_NAME_PATH, ".bkp"), &data).await;
let _ =
save_config(store.clone(), &format!("{}{}", *DATA_USAGE_OBJ_NAME_PATH, ".bkp"), data.clone()).await;
attempts += 1;
}
let _ = save_config(store.clone(), &DATA_USAGE_OBJ_NAME_PATH, &data).await;
let _ = save_config(store.clone(), &DATA_USAGE_OBJ_NAME_PATH, data).await;
attempts += 1;
} else {
continue;

View File

@@ -458,9 +458,9 @@ impl DataUsageCache {
let name_clone = name.clone();
tokio::spawn(async move {
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), &buf_clone).await;
let _ = save_config(store_clone, &format!("{}{}", &name_clone, ".bkp"), buf_clone).await;
});
save_config(store, &name, &buf).await
save_config(store, &name, buf).await
}
pub fn replace(&mut self, path: &str, parent: &str, e: DataUsageEntry) {

View File

@@ -1,226 +1,153 @@
use std::io::Read;
use std::io::Write;
use futures::TryStreamExt;
use md5::Digest;
use md5::Md5;
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::fs::File;
use tokio::io::{self, AsyncRead, AsyncWrite, ReadBuf};
use std::task::Context;
use std::task::Poll;
use tokio::io::AsyncRead;
use tokio::io::AsyncWrite;
use tokio::io::ReadBuf;
use tokio::sync::oneshot;
use tokio_util::io::ReaderStream;
use tokio_util::io::StreamReader;
use tracing::error;
use tracing::warn;
pub enum Reader {
File(File),
Buffer(VecAsyncReader),
pub type FileReader = Box<dyn AsyncRead + Send + Sync + Unpin>;
pub type FileWriter = Box<dyn AsyncWrite + Send + Sync + Unpin>;
pub const READ_BUFFER_SIZE: usize = 1024 * 1024;
#[derive(Debug)]
pub struct HttpFileWriter {
wd: tokio::io::DuplexStream,
err_rx: oneshot::Receiver<std::io::Error>,
}
impl AsyncRead for Reader {
fn poll_read(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<io::Result<()>> {
match self.get_mut() {
Reader::File(file) => Pin::new(file).poll_read(cx, buf),
Reader::Buffer(buffer) => Pin::new(buffer).poll_read(cx, buf),
}
impl HttpFileWriter {
pub fn new(url: &str, disk: &str, volume: &str, path: &str, size: usize, append: bool) -> std::io::Result<Self> {
let (rd, wd) = tokio::io::duplex(READ_BUFFER_SIZE);
let (err_tx, err_rx) = oneshot::channel::<std::io::Error>();
let body = reqwest::Body::wrap_stream(ReaderStream::with_capacity(rd, READ_BUFFER_SIZE));
let url = url.to_owned();
let disk = disk.to_owned();
let volume = volume.to_owned();
let path = path.to_owned();
tokio::spawn(async move {
let client = reqwest::Client::new();
if let Err(err) = client
.put(format!(
"{}/rustfs/rpc/put_file_stream?disk={}&volume={}&path={}&append={}&size={}",
url,
urlencoding::encode(&disk),
urlencoding::encode(&volume),
urlencoding::encode(&path),
append,
size
))
.body(body)
.send()
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
{
error!("HttpFileWriter put file err: {:?}", err);
if let Err(er) = err_tx.send(err) {
error!("HttpFileWriter tx.send err: {:?}", er);
}
}
});
Ok(Self { wd, err_rx })
}
}
#[derive(Default)]
pub enum Writer {
#[default]
NotUse,
File(File),
Buffer(VecAsyncWriter),
}
impl AsyncWrite for Writer {
fn poll_write(self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &[u8]) -> Poll<io::Result<usize>> {
match self.get_mut() {
Writer::File(file) => Pin::new(file).poll_write(cx, buf),
Writer::Buffer(buff) => Pin::new(buff).poll_write(cx, buf),
Writer::NotUse => Poll::Ready(Ok(0)),
impl AsyncWrite for HttpFileWriter {
#[tracing::instrument(level = "debug", skip(self, buf))]
fn poll_write(
mut self: Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
buf: &[u8],
) -> Poll<std::result::Result<usize, std::io::Error>> {
if let Ok(err) = self.as_mut().err_rx.try_recv() {
return Poll::Ready(Err(err));
}
Pin::new(&mut self.wd).poll_write(cx, buf)
}
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
match self.get_mut() {
Writer::File(file) => Pin::new(file).poll_flush(cx),
Writer::Buffer(buff) => Pin::new(buff).poll_flush(cx),
Writer::NotUse => Poll::Ready(Ok(())),
}
#[tracing::instrument(level = "debug", skip(self))]
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<std::result::Result<(), std::io::Error>> {
Pin::new(&mut self.wd).poll_flush(cx)
}
fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
match self.get_mut() {
Writer::File(file) => Pin::new(file).poll_shutdown(cx),
Writer::Buffer(buff) => Pin::new(buff).poll_shutdown(cx),
Writer::NotUse => Poll::Ready(Ok(())),
}
#[tracing::instrument(level = "debug", skip(self))]
fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<std::result::Result<(), std::io::Error>> {
Pin::new(&mut self.wd).poll_shutdown(cx)
}
}
pub struct AsyncToSync<R> {
pub struct HttpFileReader {
inner: FileReader,
}
impl HttpFileReader {
pub async fn new(url: &str, disk: &str, volume: &str, path: &str, offset: usize, length: usize) -> std::io::Result<Self> {
let resp = reqwest::Client::new()
.get(format!(
"{}/rustfs/rpc/read_file_stream?disk={}&volume={}&path={}&offset={}&length={}",
url,
urlencoding::encode(disk),
urlencoding::encode(volume),
urlencoding::encode(path),
offset,
length
))
.send()
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
let inner = Box::new(StreamReader::new(resp.bytes_stream().map_err(std::io::Error::other)));
Ok(Self { inner })
}
}
impl AsyncRead for HttpFileReader {
fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<tokio::io::Result<()>> {
Pin::new(&mut self.inner).poll_read(cx, buf)
}
}
pub struct EtagReader<R> {
inner: R,
md5: Md5,
}
impl<R: AsyncRead + Unpin> AsyncToSync<R> {
pub fn new_reader(inner: R) -> Self {
Self { inner }
impl<R> EtagReader<R> {
pub fn new(inner: R) -> Self {
EtagReader { inner, md5: Md5::new() }
}
fn read_async(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll<std::io::Result<usize>> {
let mut read_buf = ReadBuf::new(buf);
// Poll the underlying AsyncRead to fill the ReadBuf
match Pin::new(&mut self.inner).poll_read(cx, &mut read_buf) {
Poll::Ready(Ok(())) => Poll::Ready(Ok(read_buf.filled().len())),
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
Poll::Pending => Poll::Pending,
}
pub fn etag(self) -> String {
hex_simd::encode_to_string(self.md5.finalize(), hex_simd::AsciiCase::Lower)
}
}
impl<R: AsyncWrite + Unpin> AsyncToSync<R> {
pub fn new_writer(inner: R) -> Self {
Self { inner }
}
// This function will perform a write using AsyncWrite
fn write_async(&mut self, cx: &mut Context<'_>, buf: &[u8]) -> Poll<std::io::Result<usize>> {
let result = Pin::new(&mut self.inner).poll_write(cx, buf);
match result {
Poll::Ready(Ok(n)) => Poll::Ready(Ok(n)),
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
Poll::Pending => Poll::Pending,
}
}
impl<R: AsyncRead + Unpin> AsyncRead for EtagReader<R> {
fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll<tokio::io::Result<()>> {
match Pin::new(&mut self.inner).poll_read(cx, buf) {
Poll::Ready(Ok(())) => {
let bytes = buf.filled();
self.md5.update(bytes);
// This function will perform a flush using AsyncWrite
fn flush_async(&mut self, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
Pin::new(&mut self.inner).poll_flush(cx)
}
}
impl<R: AsyncRead + Unpin> Read for AsyncToSync<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut cx = std::task::Context::from_waker(futures::task::noop_waker_ref());
loop {
match self.read_async(&mut cx, buf) {
Poll::Ready(Ok(n)) => return Ok(n),
Poll::Ready(Err(e)) => return Err(e),
Poll::Pending => {
// If Pending, we need to wait for the readiness.
// Here, we can use an arbitrary mechanism to yield control,
// this might be blocking until some readiness occurs can be complex.
// A full blocking implementation would require an async runtime to block on.
std::thread::sleep(std::time::Duration::from_millis(1)); // Replace with proper waiting if needed
}
Poll::Ready(Ok(()))
}
other => other,
}
}
}
impl<W: AsyncWrite + Unpin> Write for AsyncToSync<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let mut cx = std::task::Context::from_waker(futures::task::noop_waker_ref());
loop {
match self.write_async(&mut cx, buf) {
Poll::Ready(Ok(n)) => return Ok(n),
Poll::Ready(Err(e)) => return Err(e),
Poll::Pending => {
// Here we are blocking and waiting for the async operation to complete.
std::thread::sleep(std::time::Duration::from_millis(1)); // Not efficient, see notes.
}
}
}
}
fn flush(&mut self) -> std::io::Result<()> {
let mut cx = std::task::Context::from_waker(futures::task::noop_waker_ref());
loop {
match self.flush_async(&mut cx) {
Poll::Ready(Ok(())) => return Ok(()),
Poll::Ready(Err(e)) => return Err(e),
Poll::Pending => {
// Again, blocking to wait for flush.
std::thread::sleep(std::time::Duration::from_millis(1)); // Not efficient, see notes.
}
}
}
}
}
pub struct VecAsyncWriter {
buffer: Vec<u8>,
}
impl VecAsyncWriter {
/// Create a new VecAsyncWriter with an empty Vec<u8>.
pub fn new(buffer: Vec<u8>) -> Self {
VecAsyncWriter { buffer }
}
/// Retrieve the underlying buffer.
pub fn get_buffer(&self) -> &[u8] {
&self.buffer
}
}
// Implementing AsyncWrite trait for VecAsyncWriter
impl AsyncWrite for VecAsyncWriter {
fn poll_write(self: Pin<&mut Self>, _cx: &mut Context<'_>, buf: &[u8]) -> Poll<io::Result<usize>> {
let len = buf.len();
// Assume synchronous writing for simplicity
self.get_mut().buffer.extend_from_slice(buf);
// Returning the length of written data
Poll::Ready(Ok(len))
}
fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
// In this case, flushing is a no-op for a Vec<u8>
Poll::Ready(Ok(()))
}
fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<()>> {
// Similar to flush, shutdown has no effect here
Poll::Ready(Ok(()))
}
}
pub struct VecAsyncReader {
buffer: Vec<u8>,
position: usize,
}
impl VecAsyncReader {
/// Create a new VecAsyncReader with the given Vec<u8>.
pub fn new(buffer: Vec<u8>) -> Self {
VecAsyncReader { buffer, position: 0 }
}
/// Reset the reader position.
pub fn reset(&mut self) {
self.position = 0;
}
}
// Implementing AsyncRead trait for VecAsyncReader
impl AsyncRead for VecAsyncReader {
fn poll_read(self: Pin<&mut Self>, _cx: &mut Context<'_>, buf: &mut ReadBuf) -> Poll<io::Result<()>> {
let this = self.get_mut();
// Check how many bytes are available to read
let len = this.buffer.len();
let bytes_available = len - this.position;
if bytes_available == 0 {
// If there's no more data to read, return ready with an Eof
return Poll::Ready(Ok(()));
}
// Calculate how much we can read into the provided buffer
let to_read = std::cmp::min(bytes_available, buf.remaining());
// Write the data to the buf
buf.put_slice(&this.buffer[this.position..this.position + to_read]);
// Update the position
this.position += to_read;
// Indicate how many bytes were read
Poll::Ready(Ok(()))
}
}

View File

@@ -350,10 +350,9 @@ impl<R: AsyncRead + Unpin> MetacacheReader<R> {
#[tokio::test]
async fn test_writer() {
use crate::io::VecAsyncReader;
use crate::io::VecAsyncWriter;
use std::io::Cursor;
let mut f = VecAsyncWriter::new(Vec::new());
let mut f = Cursor::new(Vec::new());
let mut w = MetacacheWriter::new(&mut f);
@@ -373,16 +372,16 @@ async fn test_writer() {
w.close().await.unwrap();
let data = f.get_buffer().to_vec();
let data = f.into_inner();
let nf = VecAsyncReader::new(data);
let nf = Cursor::new(data);
let mut r = MetacacheReader::new(nf);
let nobjs = r.read_all().await.unwrap();
for info in nobjs.iter() {
println!("new {:?}", &info);
}
// for info in nobjs.iter() {
// println!("new {:?}", &info);
// }
assert_eq!(objs, nobjs)
}

View File

@@ -51,7 +51,7 @@ impl PeerRestClient {
let eps = eps.clone();
let hosts = eps.hosts_sorted();
let mut remote = vec![None; hosts.len()];
let mut remote = Vec::with_capacity(hosts.len());
let mut all = vec![None; hosts.len()];
for (i, hs_host) in hosts.iter().enumerate() {
if let Some(host) = hs_host {

View File

@@ -116,7 +116,7 @@ impl PoolMeta {
data.write_all(&buf)?;
for pool in pools {
save_config(pool, POOL_META_NAME, &data).await?;
save_config(pool, POOL_META_NAME, data.clone()).await?;
}
Ok(())

View File

@@ -1,6 +1,7 @@
use std::{
collections::{HashMap, HashSet},
io::{Cursor, Write},
mem::replace,
path::Path,
sync::Arc,
time::Duration,
@@ -14,10 +15,9 @@ use crate::{
endpoint::Endpoint,
error::{is_all_not_found, DiskError},
format::FormatV3,
new_disk, BufferReader, BufferWriter, CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskOption,
DiskStore, FileInfoVersions, FileReader, FileWriter, MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams,
ReadMultipleReq, ReadMultipleResp, ReadOptions, UpdateMetadataOpts, RUSTFS_META_BUCKET, RUSTFS_META_MULTIPART_BUCKET,
RUSTFS_META_TMP_BUCKET,
new_disk, CheckPartsResp, DeleteOptions, DiskAPI, DiskInfo, DiskInfoOptions, DiskOption, DiskStore, FileInfoVersions,
MetaCacheEntries, MetaCacheEntry, MetadataResolutionParams, ReadMultipleReq, ReadMultipleResp, ReadOptions,
UpdateMetadataOpts, RUSTFS_META_BUCKET, RUSTFS_META_MULTIPART_BUCKET, RUSTFS_META_TMP_BUCKET,
},
erasure::Erasure,
error::{Error, Result},
@@ -35,6 +35,7 @@ use crate::{
},
heal_ops::BG_HEALING_UUID,
},
io::{EtagReader, READ_BUFFER_SIZE},
quorum::{object_op_ignored_errs, reduce_read_quorum_errs, reduce_write_quorum_errs, QuorumError},
store_api::{
BucketInfo, BucketOptions, CompletePart, DeleteBucketOptions, DeletedObject, FileInfo, GetObjectReader, HTTPRangeSpec,
@@ -67,6 +68,7 @@ use futures::future::join_all;
use glob::Pattern;
use http::HeaderMap;
use lock::{
// drwmutex::Options,
drwmutex::Options,
namespace_lock::{new_nslock, NsLockMap},
LockApi,
@@ -77,14 +79,12 @@ use rand::{
thread_rng,
{seq::SliceRandom, Rng},
};
use reader::reader::EtagReader;
use s3s::{dto::StreamingBlob, Body};
use sha2::{Digest, Sha256};
use std::hash::Hash;
use std::time::SystemTime;
use time::OffsetDateTime;
use tokio::{
io::DuplexStream,
io::{empty, AsyncWrite},
sync::{broadcast, RwLock},
};
use tokio::{
@@ -636,7 +636,7 @@ impl SetDisks {
}
fn get_upload_id_dir(bucket: &str, object: &str, upload_id: &str) -> String {
warn!("get_upload_id_dir upload_id {:?}", upload_id);
// warn!("get_upload_id_dir upload_id {:?}", upload_id);
let upload_uuid = base64_decode(upload_id.as_bytes())
.and_then(|v| {
@@ -1361,7 +1361,7 @@ impl SetDisks {
for (i, opdisk) in disks.iter().enumerate() {
if let Some(disk) = opdisk {
if disk.is_online().await && disk.get_disk_location().set_idx.is_some() {
info!("Disk {:?} is online", disk);
info!("Disk {:?} is online", disk.to_string());
continue;
}
@@ -1786,19 +1786,22 @@ impl SetDisks {
skip( writer,disks,fi,files),
fields(start_time=?time::OffsetDateTime::now_utc())
)]
async fn get_object_with_fileinfo(
async fn get_object_with_fileinfo<W>(
// &self,
bucket: &str,
object: &str,
offset: usize,
length: usize,
writer: &mut DuplexStream,
writer: &mut W,
fi: FileInfo,
files: Vec<FileInfo>,
disks: &[Option<DiskStore>],
set_index: usize,
pool_index: usize,
) -> Result<()> {
) -> Result<()>
where
W: AsyncWrite + Send + Sync + Unpin + 'static,
{
let (disks, files) = Self::shuffle_disks_and_parts_metadata_by_index(disks, &files, &fi);
let total_size = fi.size;
@@ -1855,20 +1858,12 @@ impl SetDisks {
// debug!("read part_path {}", &part_path);
if let Some(disk) = disk_op {
let filereader = {
if let Some(ref data) = files[idx].data {
FileReader::Buffer(BufferReader::new(data.clone()))
} else {
let disk = disk.clone();
let part_path =
format!("{}/{}/part.{}", object, files[idx].data_dir.unwrap_or(Uuid::nil()), part_number);
disk.read_file(bucket, &part_path).await?
}
};
let checksum_info = files[idx].erasure.get_checksum_info(part_number);
let reader = new_bitrot_filereader(
filereader,
disk.clone(),
files[idx].data.clone(),
bucket.to_owned(),
format!("{}/{}/part.{}", object, files[idx].data_dir.unwrap_or(Uuid::nil()), part_number),
till_offset,
checksum_info.algorithm,
erasure.shard_size(erasure.block_size),
@@ -2221,10 +2216,10 @@ impl SetDisks {
let mut outdate_disks = vec![None; disk_len];
let mut disks_to_heal_count = 0;
info!(
"errs: {:?}, data_errs_by_disk: {:?}, lastest_meta: {:?}",
errs, data_errs_by_disk, lastest_meta
);
// info!(
// "errs: {:?}, data_errs_by_disk: {:?}, lastest_meta: {:?}",
// errs, data_errs_by_disk, lastest_meta
// );
for index in 0..available_disks.len() {
let (yes, reason) = should_heal_object_on_disk(
&errs[index],
@@ -2411,18 +2406,21 @@ impl SetDisks {
let mut prefer = vec![false; latest_disks.len()];
for (index, disk) in latest_disks.iter().enumerate() {
if let (Some(disk), Some(metadata)) = (disk, &copy_parts_metadata[index]) {
let filereader = {
if let Some(ref data) = metadata.data {
FileReader::Buffer(BufferReader::new(data.clone()))
} else {
let disk = disk.clone();
let part_path = format!("{}/{}/part.{}", object, src_data_dir, part.number);
// let filereader = {
// if let Some(ref data) = metadata.data {
// Box::new(BufferReader::new(data.clone()))
// } else {
// let disk = disk.clone();
// let part_path = format!("{}/{}/part.{}", object, src_data_dir, part.number);
disk.read_file(bucket, &part_path).await?
}
};
// disk.read_file(bucket, &part_path).await?
// }
// };
let reader = new_bitrot_filereader(
filereader,
disk.clone(),
metadata.data.clone(),
bucket.to_owned(),
format!("{}/{}/part.{}", object, src_data_dir, part.number),
till_offset,
checksum_algo.clone(),
erasure.shard_size(erasure.block_size),
@@ -2444,21 +2442,25 @@ impl SetDisks {
for disk in out_dated_disks.iter() {
if let Some(disk) = disk {
let filewriter = {
if is_inline_buffer {
FileWriter::Buffer(BufferWriter::new(Vec::new()))
} else {
let disk = disk.clone();
let part_path = format!("{}/{}/part.{}", tmp_id, dst_data_dir, part.number);
disk.create_file("", RUSTFS_META_TMP_BUCKET, &part_path, 0).await?
}
};
// let filewriter = {
// if is_inline_buffer {
// Box::new(Cursor::new(Vec::new()))
// } else {
// let disk = disk.clone();
// let part_path = format!("{}/{}/part.{}", tmp_id, dst_data_dir, part.number);
// disk.create_file("", RUSTFS_META_TMP_BUCKET, &part_path, 0).await?
// }
// };
let writer = new_bitrot_filewriter(
filewriter,
disk.clone(),
RUSTFS_META_TMP_BUCKET,
format!("{}/{}/part.{}", tmp_id, dst_data_dir, part.number).as_str(),
is_inline_buffer,
DEFAULT_BITROT_ALGO,
erasure.shard_size(erasure.block_size),
);
)
.await?;
writers.push(Some(writer));
} else {
@@ -2494,9 +2496,7 @@ impl SetDisks {
if is_inline_buffer {
if let Some(ref writer) = writers[index] {
if let Some(w) = writer.as_any().downcast_ref::<BitrotFileWriter>() {
if let FileWriter::Buffer(buffer_writer) = w.writer() {
parts_metadata[index].data = Some(buffer_writer.as_ref().to_vec());
}
parts_metadata[index].data = Some(w.inline_data().to_vec());
}
}
parts_metadata[index].set_inline_data();
@@ -3607,7 +3607,7 @@ impl ObjectIO for SetDisks {
}
let reader = GetObjectReader {
stream: StreamingBlob::from(Body::from(Vec::new())),
stream: Box::new(Cursor::new(Vec::new())),
object_info,
};
return Ok(reader);
@@ -3615,10 +3615,9 @@ impl ObjectIO for SetDisks {
// TODO: remote
let (rd, mut wd) = tokio::io::duplex(fi.erasure.block_size);
let (rd, wd) = tokio::io::duplex(READ_BUFFER_SIZE);
let (reader, offset, length) =
GetObjectReader::new(StreamingBlob::wrap(tokio_util::io::ReaderStream::new(rd)), range, &object_info, opts, &h)?;
let (reader, offset, length) = GetObjectReader::new(Box::new(rd), range, &object_info, opts, &h)?;
// let disks = disks.clone();
let bucket = bucket.to_owned();
@@ -3627,12 +3626,23 @@ impl ObjectIO for SetDisks {
let pool_index = self.pool_index;
tokio::spawn(async move {
if let Err(e) = Self::get_object_with_fileinfo(
&bucket, &object, offset, length, &mut wd, fi, files, &disks, set_index, pool_index,
&bucket,
&object,
offset,
length,
&mut Box::new(wd),
fi,
files,
&disks,
set_index,
pool_index,
)
.await
{
error!("get_object_with_fileinfo err {:?}", e);
};
// error!("get_object_with_fileinfo end");
});
Ok(reader)
@@ -3736,17 +3746,25 @@ impl ObjectIO for SetDisks {
for disk_op in shuffle_disks.iter() {
if let Some(disk) = disk_op {
let filewriter = {
if is_inline_buffer {
FileWriter::Buffer(BufferWriter::new(Vec::new()))
} else {
let disk = disk.clone();
// let filewriter = {
// if is_inline_buffer {
// Box::new(Cursor::new(Vec::new()))
// } else {
// let disk = disk.clone();
disk.create_file("", RUSTFS_META_TMP_BUCKET, &tmp_object, 0).await?
}
};
// disk.create_file("", RUSTFS_META_TMP_BUCKET, &tmp_object, 0).await?
// }
// };
let writer = new_bitrot_filewriter(filewriter, DEFAULT_BITROT_ALGO, erasure.shard_size(erasure.block_size));
let writer = new_bitrot_filewriter(
disk.clone(),
RUSTFS_META_TMP_BUCKET,
&tmp_object,
is_inline_buffer,
DEFAULT_BITROT_ALGO,
erasure.shard_size(erasure.block_size),
)
.await?;
writers.push(Some(writer));
} else {
@@ -3754,13 +3772,19 @@ impl ObjectIO for SetDisks {
}
}
let stream = replace(&mut data.stream, Box::new(empty()));
let mut etag_stream = EtagReader::new(stream);
// TODO: etag from header
let mut etag_stream = EtagReader::new(&mut data.stream, None, None);
let w_size = erasure
.encode(&mut etag_stream, &mut writers, data.content_length, write_quorum)
.await?; // TODO: 出错,删除临时目录
if let Err(err) = close_bitrot_writers(&mut writers).await {
error!("close_bitrot_writers err {:?}", err);
}
let etag = etag_stream.etag();
//TODO: userDefined
@@ -3782,9 +3806,7 @@ impl ObjectIO for SetDisks {
if is_inline_buffer {
if let Some(ref writer) = writers[i] {
if let Some(w) = writer.as_any().downcast_ref::<BitrotFileWriter>() {
if let FileWriter::Buffer(buffer_writer) = w.writer() {
fi.data = Some(buffer_writer.as_ref().to_vec());
}
fi.data = Some(w.inline_data().to_vec());
}
}
}
@@ -4081,7 +4103,7 @@ impl StorageAPI for SetDisks {
for errs in results.into_iter().flatten() {
// TODO: handle err reduceWriteQuorumErrs
for err in errs.iter() {
for err in errs.iter().flatten() {
warn!("result err {:?}", err);
}
}
@@ -4288,6 +4310,7 @@ impl StorageAPI for SetDisks {
unimplemented!()
}
#[tracing::instrument(level = "debug", skip(self, data, opts))]
async fn put_object_part(
&self,
bucket: &str,
@@ -4318,10 +4341,18 @@ impl StorageAPI for SetDisks {
for disk in disks.iter() {
if let Some(disk) = disk {
// let writer = disk.append_file(RUSTFS_META_TMP_BUCKET, &tmp_part_path).await?;
let filewriter = disk
.create_file("", RUSTFS_META_TMP_BUCKET, &tmp_part_path, data.content_length)
.await?;
let writer = new_bitrot_filewriter(filewriter, DEFAULT_BITROT_ALGO, erasure.shard_size(erasure.block_size));
// let filewriter = disk
// .create_file("", RUSTFS_META_TMP_BUCKET, &tmp_part_path, data.content_length)
// .await?;
let writer = new_bitrot_filewriter(
disk.clone(),
RUSTFS_META_TMP_BUCKET,
&tmp_part_path,
false,
DEFAULT_BITROT_ALGO,
erasure.shard_size(erasure.block_size),
)
.await?;
writers.push(Some(writer));
} else {
writers.push(None);
@@ -4330,12 +4361,17 @@ impl StorageAPI for SetDisks {
let mut erasure = Erasure::new(fi.erasure.data_blocks, fi.erasure.parity_blocks, fi.erasure.block_size);
let mut etag_stream = EtagReader::new(&mut data.stream, None, None);
let stream = replace(&mut data.stream, Box::new(empty()));
let mut etag_stream = EtagReader::new(stream);
let w_size = erasure
.encode(&mut etag_stream, &mut writers, data.content_length, write_quorum)
.await?;
if let Err(err) = close_bitrot_writers(&mut writers).await {
error!("close_bitrot_writers err {:?}", err);
}
let mut etag = etag_stream.etag();
if let Some(ref tag) = opts.preserve_etag {
@@ -4811,25 +4847,28 @@ impl StorageAPI for SetDisks {
}
}
// TODO: 优化 cleanupMultipartPath
for p in curr_fi.parts.iter() {
self.remove_part_meta(
bucket,
object,
upload_id,
curr_fi.data_dir.unwrap_or(Uuid::nil()).to_string().as_str(),
p.number,
)
.await?;
if !fi.parts.iter().any(|v| v.number == p.number) {
self.remove_object_part(
let _ = self
.remove_part_meta(
bucket,
object,
upload_id,
curr_fi.data_dir.unwrap_or(Uuid::nil()).to_string().as_str(),
p.number,
)
.await?;
.await;
if !fi.parts.iter().any(|v| v.number == p.number) {
let _ = self
.remove_object_part(
bucket,
object,
upload_id,
curr_fi.data_dir.unwrap_or(Uuid::nil()).to_string().as_str(),
p.number,
)
.await;
}
}
@@ -5205,7 +5244,7 @@ async fn disks_with_all_parts(
}
}
}
info!("meta_errs: {:?}, errs: {:?}", meta_errs, errs);
// info!("meta_errs: {:?}, errs: {:?}", meta_errs, errs);
meta_errs.iter().enumerate().for_each(|(index, err)| {
if err.is_some() {
let part_err = conv_part_err_to_int(err);
@@ -5215,7 +5254,7 @@ async fn disks_with_all_parts(
}
});
info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
// info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
for (index, disk) in online_disks.iter().enumerate() {
if meta_errs[index].is_some() {
continue;
@@ -5239,13 +5278,15 @@ async fn disks_with_all_parts(
let checksum_info = meta.erasure.get_checksum_info(meta.parts[0].number);
let data_len = data.len();
let verify_err = match bitrot_verify(
&mut Cursor::new(data.to_vec()),
Box::new(Cursor::new(data.clone())),
data_len,
meta.erasure.shard_file_size(meta.size),
checksum_info.algorithm,
checksum_info.hash,
meta.erasure.shard_size(meta.erasure.block_size),
) {
)
.await
{
Ok(_) => None,
Err(err) => Some(err),
};
@@ -5300,7 +5341,7 @@ async fn disks_with_all_parts(
}
}
}
info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
// info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
for (part, disks) in data_errs_by_part.iter() {
for (idx, disk) in disks.iter().enumerate() {
if let Some(vec) = data_errs_by_disk.get_mut(&idx) {
@@ -5308,7 +5349,7 @@ async fn disks_with_all_parts(
}
}
}
info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
// info!("data_errs_by_part: {:?}, data_errs_by_disk: {:?}", data_errs_by_part, data_errs_by_disk);
for (i, disk) in online_disks.iter().enumerate() {
if meta_errs[i].is_none() && disk.is_some() && !has_part_err(&data_errs_by_disk[&i]) {
available_disks[i] = Some(disk.clone().unwrap());

View File

@@ -138,7 +138,7 @@ impl Sets {
if let Some(_disk_id) = has_disk_id {
set_drive.push(disk);
} else {
warn!("sets new set_drive {}-{} get_disk_id is none", i, j);
error!("sets new set_drive {}-{} get_disk_id is none", i, j);
set_drive.push(None);
}
}
@@ -207,7 +207,7 @@ impl Sets {
},
_ = cloned_token.cancelled() => {
warn!("ctx cancelled");
warn!("monitor_and_connect_endpoints ctx cancelled");
break;
}
}

View File

@@ -1,4 +1,5 @@
use crate::heal::heal_ops::HealSequence;
use crate::io::FileReader;
use crate::store_utils::clean_metadata;
use crate::{
disk::DiskStore,
@@ -7,19 +8,20 @@ use crate::{
utils::path::decode_dir_object,
xhttp,
};
use futures::StreamExt;
use http::{HeaderMap, HeaderValue};
use madmin::heal_commands::HealResultItem;
use rmp_serde::Serializer;
use s3s::{dto::StreamingBlob, Body};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt::Debug;
use std::io::Cursor;
use std::sync::Arc;
use time::OffsetDateTime;
use tokio::io::AsyncReadExt;
use uuid::Uuid;
pub const ERASURE_ALGORITHM: &str = "rs-vandermonde";
pub const BLOCK_SIZE_V2: usize = 1048576; // 1M
pub const BLOCK_SIZE_V2: usize = 1024 * 1024; // 1M
pub const RESERVED_METADATA_PREFIX: &str = "X-Rustfs-Internal-";
pub const RESERVED_METADATA_PREFIX_LOWER: &str = "X-Rustfs-Internal-";
pub const RUSTFS_HEALING: &str = "X-Rustfs-Internal-healing";
@@ -416,35 +418,42 @@ pub struct DeleteBucketOptions {
pub srdelete_op: SRBucketDeleteOp,
}
#[derive(Debug)]
pub struct PutObjReader {
pub stream: StreamingBlob,
pub stream: FileReader,
pub content_length: usize,
}
impl Debug for PutObjReader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PutObjReader")
.field("content_length", &self.content_length)
.finish()
}
}
impl PutObjReader {
pub fn new(stream: StreamingBlob, content_length: usize) -> Self {
pub fn new(stream: FileReader, content_length: usize) -> Self {
PutObjReader { stream, content_length }
}
pub fn from_vec(data: Vec<u8>) -> Self {
let content_length = data.len();
PutObjReader {
stream: Body::from(data).into(),
stream: Box::new(Cursor::new(data)),
content_length,
}
}
}
pub struct GetObjectReader {
pub stream: StreamingBlob,
pub stream: FileReader,
pub object_info: ObjectInfo,
}
impl GetObjectReader {
#[tracing::instrument(level = "debug", skip(reader))]
pub fn new(
reader: StreamingBlob,
reader: FileReader,
rs: Option<HTTPRangeSpec>,
oi: &ObjectInfo,
opts: &ObjectOptions,
@@ -482,14 +491,15 @@ impl GetObjectReader {
}
pub async fn read_all(&mut self) -> Result<Vec<u8>> {
let mut data = Vec::new();
self.stream.read_to_end(&mut data).await?;
while let Some(x) = self.stream.next().await {
let buf = match x {
Ok(res) => res,
Err(e) => return Err(Error::msg(e.to_string())),
};
data.extend_from_slice(buf.as_ref());
}
// while let Some(x) = self.stream.next().await {
// let buf = match x {
// Ok(res) => res,
// Err(e) => return Err(Error::msg(e.to_string())),
// };
// data.extend_from_slice(buf.as_ref());
// }
Ok(data)
}

View File

@@ -53,6 +53,7 @@ pub async fn connect_load_init_formats(
set_drive_count: usize,
deployment_id: Option<Uuid>,
) -> Result<FormatV3, Error> {
warn!("connect_load_init_formats first_disk: {}", first_disk);
let (formats, errs) = load_format_erasure_all(disks, false).await;
debug!("load_format_erasure_all errs {:?}", &errs);
@@ -63,12 +64,13 @@ pub async fn connect_load_init_formats(
if first_disk && DiskError::should_init_erasure_disks(&errs) {
// UnformattedDisk, not format file create
warn!("first_disk && should_init_erasure_disks");
// new format and save
let fms = init_format_erasure(disks, set_count, set_drive_count, deployment_id);
let errs = save_format_file_all(disks, &fms).await;
debug!("save_format_file_all errs {:?}", &errs);
warn!("save_format_file_all errs {:?}", &errs);
// TODO: check quorum
// reduceWriteQuorumErrs(&errs)?;
@@ -77,6 +79,12 @@ pub async fn connect_load_init_formats(
return Ok(fm);
}
warn!(
"first_disk: {}, should_init_erasure_disks: {}",
first_disk,
DiskError::should_init_erasure_disks(&errs)
);
let unformatted = DiskError::quorum_unformatted_disks(&errs);
if unformatted && !first_disk {
return Err(Error::new(ErasureError::NotFirstDisk));

View File

@@ -99,7 +99,7 @@ fn get_fs_type(fs_type: FsType) -> &'static str {
match fs_type {
statfs::TMPFS_MAGIC => "TMPFS",
statfs::MSDOS_SUPER_MAGIC => "MSDOS",
statfs::XFS_SUPER_MAGIC => "XFS",
// statfs::XFS_SUPER_MAGIC => "XFS",
statfs::NFS_SUPER_MAGIC => "NFS",
statfs::EXT4_SUPER_MAGIC => "EXT4",
statfs::ECRYPTFS_SUPER_MAGIC => "ecryptfs",

View File

@@ -370,7 +370,7 @@ impl Store for ObjectStore {
let mut data = serde_json::to_vec(&item)?;
data = Self::encrypt_data(&data)?;
save_config(self.object_api.clone(), path.as_ref(), &data).await
save_config(self.object_api.clone(), path.as_ref(), data).await
}
async fn delete_iam_config(&self, path: impl AsRef<str> + Send) -> Result<()> {
delete_config(self.object_api.clone(), path.as_ref()).await

View File

@@ -1,24 +0,0 @@
[package]
name = "reader"
edition.workspace = true
license.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true
[lints]
workspace = true
[dependencies]
tracing.workspace = true
s3s.workspace = true
thiserror.workspace = true
bytes.workspace = true
pin-project-lite.workspace = true
hex-simd = "0.8.0"
md-5.workspace = true
sha2 = { version = "0.11.0-pre.4" }
futures.workspace = true
[dev-dependencies]
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }

View File

@@ -1,12 +0,0 @@
#[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum ReaderError {
#[error("stream input error {0}")]
StreamInput(String),
//
#[error("etag: expected ETag {0} does not match computed ETag {1}")]
VerifyError(String, String),
#[error("Bad checksum: Want {0} does not match calculated {1}")]
ChecksumMismatch(String, String),
#[error("Bad sha256: Expected {0} does not match calculated {1}")]
SHA256Mismatch(String, String),
}

View File

@@ -1,170 +0,0 @@
use md5::{Digest as Md5Digest, Md5};
use sha2::{
digest::{Reset, Update},
Digest, Sha256 as sha_sha256,
};
pub trait Hasher {
fn write(&mut self, bytes: &[u8]);
fn reset(&mut self);
fn sum(&mut self) -> String;
fn size(&self) -> usize;
fn block_size(&self) -> usize;
}
#[derive(Default)]
pub enum HashType {
#[default]
Undefined,
Uuid(Uuid),
Md5(MD5),
Sha256(Sha256),
}
impl Hasher for HashType {
fn write(&mut self, bytes: &[u8]) {
match self {
HashType::Md5(md5) => md5.write(bytes),
HashType::Sha256(sha256) => sha256.write(bytes),
HashType::Uuid(uuid) => uuid.write(bytes),
HashType::Undefined => (),
}
}
fn reset(&mut self) {
match self {
HashType::Md5(md5) => md5.reset(),
HashType::Sha256(sha256) => sha256.reset(),
HashType::Uuid(uuid) => uuid.reset(),
HashType::Undefined => (),
}
}
fn sum(&mut self) -> String {
match self {
HashType::Md5(md5) => md5.sum(),
HashType::Sha256(sha256) => sha256.sum(),
HashType::Uuid(uuid) => uuid.sum(),
HashType::Undefined => "".to_owned(),
}
}
fn size(&self) -> usize {
match self {
HashType::Md5(md5) => md5.size(),
HashType::Sha256(sha256) => sha256.size(),
HashType::Uuid(uuid) => uuid.size(),
HashType::Undefined => 0,
}
}
fn block_size(&self) -> usize {
match self {
HashType::Md5(md5) => md5.block_size(),
HashType::Sha256(sha256) => sha256.block_size(),
HashType::Uuid(uuid) => uuid.block_size(),
HashType::Undefined => 64,
}
}
}
pub struct Sha256 {
hasher: sha_sha256,
}
impl Sha256 {
pub fn new() -> Self {
Self {
hasher: sha_sha256::new(),
}
}
}
impl Default for Sha256 {
fn default() -> Self {
Self::new()
}
}
impl Hasher for Sha256 {
fn write(&mut self, bytes: &[u8]) {
Update::update(&mut self.hasher, bytes);
}
fn reset(&mut self) {
Reset::reset(&mut self.hasher);
}
fn sum(&mut self) -> String {
hex_simd::encode_to_string(self.hasher.clone().finalize(), hex_simd::AsciiCase::Lower)
}
fn size(&self) -> usize {
32
}
fn block_size(&self) -> usize {
64
}
}
pub struct MD5 {
hasher: Md5,
}
impl MD5 {
pub fn new() -> Self {
Self { hasher: Md5::new() }
}
}
impl Default for MD5 {
fn default() -> Self {
Self::new()
}
}
impl Hasher for MD5 {
fn write(&mut self, bytes: &[u8]) {
self.hasher.update(bytes);
}
fn reset(&mut self) {}
fn sum(&mut self) -> String {
hex_simd::encode_to_string(self.hasher.clone().finalize(), hex_simd::AsciiCase::Lower)
}
fn size(&self) -> usize {
32
}
fn block_size(&self) -> usize {
64
}
}
pub struct Uuid {
id: String,
}
impl Uuid {
pub fn new(id: String) -> Self {
Self { id }
}
}
impl Hasher for Uuid {
fn write(&mut self, _bytes: &[u8]) {}
fn reset(&mut self) {}
fn sum(&mut self) -> String {
self.id.clone()
}
fn size(&self) -> usize {
self.id.len()
}
fn block_size(&self) -> usize {
64
}
}

View File

@@ -1,7 +0,0 @@
pub mod error;
pub mod hasher;
pub mod reader;
pub fn hex(data: impl AsRef<[u8]>) -> String {
hex_simd::encode_to_string(data, hex_simd::AsciiCase::Lower)
}

View File

@@ -1,493 +0,0 @@
use bytes::Bytes;
use s3s::StdError;
use std::collections::VecDeque;
use std::pin::Pin;
use std::task::Poll;
use crate::{
error::ReaderError,
hasher::{HashType, Uuid},
};
// use futures::stream::Stream;
use super::hasher::{Hasher, Sha256, MD5};
use futures::Stream;
pin_project_lite::pin_project! {
#[derive(Default)]
pub struct EtagReader<S> {
#[pin]
inner: S,
md5: HashType,
checksum:Option<String>,
bytes_read:usize,
}
}
impl<S> EtagReader<S> {
pub fn new(inner: S, etag: Option<String>, force_md5: Option<String>) -> Self {
let md5 = {
if let Some(m) = force_md5 {
HashType::Uuid(Uuid::new(m))
} else {
HashType::Md5(MD5::new())
}
};
Self {
inner,
md5,
checksum: etag,
bytes_read: 0,
}
}
pub fn etag(&mut self) -> String {
self.md5.sum()
}
}
impl<S> Stream for EtagReader<S>
where
S: Stream<Item = Result<Bytes, StdError>>,
{
type Item = Result<Bytes, StdError>;
fn poll_next(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.project();
let poll = this.inner.poll_next(cx);
if let Poll::Ready(ref res) = poll {
match res {
Some(Ok(bytes)) => {
*this.bytes_read += bytes.len();
this.md5.write(bytes);
}
Some(Err(err)) => {
return Poll::Ready(Some(Err(Box::new(ReaderError::StreamInput(err.to_string())))));
}
None => {
if let Some(etag) = this.checksum {
let got = this.md5.sum();
if got.as_str() != etag.as_str() {
return Poll::Ready(Some(Err(Box::new(ReaderError::VerifyError(etag.to_owned(), got)))));
}
}
}
}
}
poll
}
}
pin_project_lite::pin_project! {
#[derive(Default)]
pub struct HashReader<S> {
#[pin]
inner: S,
sha256: Option<Sha256>,
md5: Option<MD5>,
md5_hex:Option<String>,
sha256_hex:Option<String>,
size:usize,
actual_size: usize,
bytes_read:usize,
}
}
impl<S> HashReader<S> {
pub fn new(inner: S, size: usize, md5_hex: Option<String>, sha256_hex: Option<String>, actual_size: usize) -> Self {
let md5 = {
if md5_hex.is_some() {
Some(MD5::new())
} else {
None
}
};
let sha256 = {
if sha256_hex.is_some() {
Some(Sha256::new())
} else {
None
}
};
Self {
inner,
size,
actual_size,
md5_hex,
sha256_hex,
bytes_read: 0,
md5,
sha256,
}
}
}
impl<S> Stream for HashReader<S>
where
S: Stream<Item = Result<Bytes, StdError>>,
{
type Item = Result<Bytes, StdError>;
fn poll_next(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.project();
let poll = this.inner.poll_next(cx);
if let Poll::Ready(ref res) = poll {
match res {
Some(Ok(bytes)) => {
*this.bytes_read += bytes.len();
if let Some(sha) = this.sha256 {
sha.write(bytes);
}
if let Some(md5) = this.md5 {
md5.write(bytes);
}
}
Some(Err(err)) => {
return Poll::Ready(Some(Err(Box::new(ReaderError::StreamInput(err.to_string())))));
}
None => {
if let Some(hash) = this.sha256 {
if let Some(hex) = this.sha256_hex {
let got = hash.sum();
let src = hex.as_str();
if src != got.as_str() {
println!("sha256 err src:{},got:{}", src, got);
return Poll::Ready(Some(Err(Box::new(ReaderError::SHA256Mismatch(src.to_string(), got)))));
}
}
}
if let Some(hash) = this.md5 {
if let Some(hex) = this.md5_hex {
let got = hash.sum();
let src = hex.as_str();
if src != got.as_str() {
// TODO: ERR
println!("md5 err src:{},got:{}", src, got);
return Poll::Ready(Some(Err(Box::new(ReaderError::ChecksumMismatch(src.to_string(), got)))));
}
}
}
}
}
}
// println!("poll {:?}", poll);
poll
}
}
pin_project_lite::pin_project! {
pub struct ChunkedStream<S> {
#[pin]
inner: S,
chuck_size: usize,
streams: VecDeque<Bytes>,
remaining:Vec<u8>,
}
}
impl<S> ChunkedStream<S> {
pub fn new(inner: S, chuck_size: usize) -> Self {
Self {
inner,
chuck_size,
streams: VecDeque::new(),
remaining: Vec::new(),
}
}
}
impl<S> Stream for ChunkedStream<S>
where
S: Stream<Item = Result<Bytes, StdError>> + Send + Sync,
// E: std::error::Error + Send + Sync,
{
type Item = Result<Bytes, StdError>;
fn poll_next(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<Option<Self::Item>> {
let (items, op_items) = self.inner.size_hint();
let this = self.project();
if let Some(b) = this.streams.pop_front() {
return Poll::Ready(Some(Ok(b)));
}
let poll = this.inner.poll_next(cx);
match poll {
Poll::Ready(res_op) => match res_op {
Some(res) => match res {
Ok(bytes) => {
let chuck_size = *this.chuck_size;
let mut bytes = bytes;
// println!("get len {}", bytes.len());
// 如果有剩余
if !this.remaining.is_empty() {
let need_size = chuck_size - this.remaining.len();
// 传入的数据大小需要补齐的大小,使用传入数据补齐
if bytes.len() >= need_size {
let add_bytes = bytes.split_to(need_size);
this.remaining.extend_from_slice(&add_bytes);
this.streams.push_back(Bytes::from(this.remaining.clone()));
this.remaining.clear();
} else {
// 不够,直接追加
let need_size = bytes.len();
let add_bytes = bytes.split_to(need_size);
this.remaining.extend_from_slice(&add_bytes);
}
}
loop {
if bytes.len() < chuck_size {
break;
}
let chuck = bytes.split_to(chuck_size);
this.streams.push_back(chuck);
}
if !bytes.is_empty() {
this.remaining.extend_from_slice(&bytes);
}
if let Some(b) = this.streams.pop_front() {
return Poll::Ready(Some(Ok(b)));
}
if items > 0 || op_items.is_some() {
return Poll::Pending;
}
if !this.remaining.is_empty() {
let b = this.remaining.clone();
this.remaining.clear();
return Poll::Ready(Some(Ok(Bytes::from(b))));
}
Poll::Ready(None)
}
Err(err) => Poll::Ready(Some(Err(err))),
},
None => {
// println!("get empty");
if let Some(b) = this.streams.pop_front() {
return Poll::Ready(Some(Ok(b)));
}
if !this.remaining.is_empty() {
let b = this.remaining.clone();
this.remaining.clear();
return Poll::Ready(Some(Ok(Bytes::from(b))));
}
Poll::Ready(None)
}
},
Poll::Pending => {
// println!("get Pending");
Poll::Pending
}
}
// if let Poll::Ready(Some(res)) = poll {
// warn!("poll res ...");
// match res {
// Ok(bytes) => {
// let chuck_size = *this.chuck_size;
// let mut bytes = bytes;
// if this.remaining.len() > 0 {
// let need_size = chuck_size - this.remaining.len();
// let add_bytes = bytes.split_to(need_size);
// this.remaining.extend_from_slice(&add_bytes);
// warn!("poll push_back remaining ...1");
// this.streams.push_back(Bytes::from(this.remaining.clone()));
// this.remaining.clear();
// }
// loop {
// if bytes.len() < chuck_size {
// break;
// }
// let chuck = bytes.split_to(chuck_size);
// warn!("poll push_back ...1");
// this.streams.push_back(chuck);
// }
// warn!("poll remaining extend_from_slice...1");
// this.remaining.extend_from_slice(&bytes);
// }
// Err(err) => return Poll::Ready(Some(Err(err))),
// }
// }
// if let Some(b) = this.streams.pop_front() {
// warn!("poll pop_front ...");
// return Poll::Ready(Some(Ok(b)));
// }
// if this.remaining.len() > 0 {
// let b = this.remaining.clone();
// this.remaining.clear();
// warn!("poll remaining ...1");
// return Poll::Ready(Some(Ok(Bytes::from(b))));
// }
// Poll::Pending
}
fn size_hint(&self) -> (usize, Option<usize>) {
let mut items = self.streams.len();
if !self.remaining.is_empty() {
items += 1;
}
(items, Some(items))
}
}
#[cfg(test)]
mod test {
use super::*;
use futures::StreamExt;
#[tokio::test]
async fn test_etag_reader() {
let data1 = vec![1u8; 60]; // 65536
let data2 = vec![0u8; 32]; // 65536
let chunk1 = Bytes::from(data1);
let chunk2 = Bytes::from(data2);
let chunk_results: Vec<Result<Bytes, StdError>> = vec![Ok(chunk1), Ok(chunk2)];
let mut stream = futures::stream::iter(chunk_results);
let mut hash_reader = EtagReader::new(&mut stream, None, None);
// let chunk_size = 8;
// let mut chunked_stream = ChunkStream::new(&mut hash_reader, chunk_size);
loop {
match hash_reader.next().await {
Some(res) => match res {
Ok(bytes) => {
println!("bytes: {}, {:?}", bytes.len(), bytes);
}
Err(err) => {
println!("err:{:?}", err);
break;
}
},
None => {
println!("next none");
break;
}
}
}
println!("etag:{}", hash_reader.etag());
// 9a7dfa2fcd7b69c89a30cfd3a9be11ab58cb6172628bd7e967fad1e187456d45
// println!("md5: {:?}", hash_reader.hex());
}
#[tokio::test]
async fn test_hash_reader() {
let data1 = vec![1u8; 60]; // 65536
let data2 = vec![0u8; 32]; // 65536
let size = data1.len() + data2.len();
let chunk1 = Bytes::from(data1);
let chunk2 = Bytes::from(data2);
let chunk_results: Vec<Result<Bytes, StdError>> = vec![Ok(chunk1), Ok(chunk2)];
let mut stream = futures::stream::iter(chunk_results);
let mut hash_reader = HashReader::new(
&mut stream,
size,
Some("d94c485610a7a00a574df55e45d3cc0c".to_string()),
Some("9a7dfa2fcd7b69c89a30cfd3a9be11ab58cb6172628bd7e967fad1e187456d45".to_string()),
0,
);
// let chunk_size = 8;
// let mut chunked_stream = ChunkStream::new(&mut hash_reader, chunk_size);
loop {
match hash_reader.next().await {
Some(res) => match res {
Ok(bytes) => {
println!("bytes: {}, {:?}", bytes.len(), bytes);
}
Err(err) => {
println!("err:{:?}", err);
break;
}
},
None => {
println!("next none");
break;
}
}
}
// BUG: borrow of moved value: `md5_stream`
// 9a7dfa2fcd7b69c89a30cfd3a9be11ab58cb6172628bd7e967fad1e187456d45
// println!("md5: {:?}", hash_reader.hex());
}
#[tokio::test]
async fn test_chunked_stream() {
let data1 = vec![1u8; 60]; // 65536
let data2 = vec![0u8; 33]; // 65536
let data3 = vec![4u8; 5]; // 65536
let chunk1 = Bytes::from(data1);
let chunk2 = Bytes::from(data2);
let chunk3 = Bytes::from(data3);
let chunk_results: Vec<Result<Bytes, StdError>> = vec![Ok(chunk1), Ok(chunk2), Ok(chunk3)];
let mut stream = futures::stream::iter(chunk_results);
// let mut hash_reader = HashReader::new(
// &mut stream,
// size,
// Some("d94c485610a7a00a574df55e45d3cc0c".to_string()),
// Some("9a7dfa2fcd7b69c89a30cfd3a9be11ab58cb6172628bd7e967fad1e187456d45".to_string()),
// 0,
// );
let chunk_size = 8;
let mut etag_reader = EtagReader::new(&mut stream, None, None);
let mut chunked_stream = ChunkedStream::new(&mut etag_reader, chunk_size);
loop {
match chunked_stream.next().await {
Some(res) => match res {
Ok(bytes) => {
println!("bytes: {}, {:?}", bytes.len(), bytes);
}
Err(err) => {
println!("err:{:?}", err);
break;
}
},
None => {
println!("next none");
break;
}
}
}
println!("etag:{}", etag_reader.etag());
}
}

View File

@@ -1,5 +0,0 @@
# 流程
## 写入
http::Body -> HashReader -> ...(other reader) -> ChuckedReader -> BitrotWriter -> FileWriter

View File

@@ -61,7 +61,6 @@ tracing-subscriber.workspace = true
transform-stream.workspace = true
uuid = "1.15.1"
url.workspace = true
admin = { path = "../api/admin" }
axum.workspace = true
matchit = "0.8.6"
shadow-rs.workspace = true

View File

@@ -1,5 +1,6 @@
pub mod handlers;
pub mod router;
mod rpc;
pub mod utils;
use common::error::Result;
@@ -11,6 +12,7 @@ use handlers::{
};
use hyper::Method;
use router::{AdminOperation, S3Router};
use rpc::regist_rpc_route;
use s3s::route::S3Route;
const ADMIN_PREFIX: &str = "/rustfs/admin";
@@ -21,6 +23,7 @@ pub fn make_admin_route() -> Result<impl S3Route> {
// 1
r.insert(Method::POST, "/", AdminOperation(&sts::AssumeRoleHandle {}))?;
regist_rpc_route(&mut r)?;
regist_user_route(&mut r)?;
r.insert(

View File

@@ -14,6 +14,7 @@ use s3s::S3Request;
use s3s::S3Response;
use s3s::S3Result;
use super::rpc::RPC_PREFIX;
use super::ADMIN_PREFIX;
pub struct S3Router<T> {
@@ -63,7 +64,7 @@ where
}
}
uri.path().starts_with(ADMIN_PREFIX)
uri.path().starts_with(ADMIN_PREFIX) || uri.path().starts_with(RPC_PREFIX)
}
async fn call(&self, req: S3Request<Body>) -> S3Result<S3Response<(StatusCode, Body)>> {
@@ -81,6 +82,10 @@ where
// check_access before call
async fn check_access(&self, req: &mut S3Request<Body>) -> S3Result<()> {
// TODO: check access by req.credentials
if req.uri.path().starts_with(RPC_PREFIX) {
return Ok(());
}
match req.credentials {
Some(_) => Ok(()),
None => Err(s3_error!(AccessDenied, "Signature is required")),

132
rustfs/src/admin/rpc.rs Normal file
View File

@@ -0,0 +1,132 @@
use super::router::AdminOperation;
use super::router::Operation;
use super::router::S3Router;
use crate::storage::ecfs::bytes_stream;
use common::error::Result;
use ecstore::disk::DiskAPI;
use ecstore::io::READ_BUFFER_SIZE;
use ecstore::store::find_local_disk;
use futures::TryStreamExt;
use http::StatusCode;
use hyper::Method;
use matchit::Params;
use s3s::dto::StreamingBlob;
use s3s::s3_error;
use s3s::Body;
use s3s::S3Request;
use s3s::S3Response;
use s3s::S3Result;
use serde_urlencoded::from_bytes;
use tokio_util::io::ReaderStream;
use tokio_util::io::StreamReader;
pub const RPC_PREFIX: &str = "/rustfs/rpc";
pub fn regist_rpc_route(r: &mut S3Router<AdminOperation>) -> Result<()> {
r.insert(
Method::GET,
format!("{}{}", RPC_PREFIX, "/read_file_stream").as_str(),
AdminOperation(&ReadFile {}),
)?;
r.insert(
Method::PUT,
format!("{}{}", RPC_PREFIX, "/put_file_stream").as_str(),
AdminOperation(&PutFile {}),
)?;
Ok(())
}
// /rustfs/rpc/read_file_stream?disk={}&volume={}&path={}&offset={}&length={}"
#[derive(Debug, Default, serde::Deserialize)]
pub struct ReadFileQuery {
disk: String,
volume: String,
path: String,
offset: usize,
length: usize,
}
pub struct ReadFile {}
#[async_trait::async_trait]
impl Operation for ReadFile {
async fn call(&self, req: S3Request<Body>, _params: Params<'_, '_>) -> S3Result<S3Response<(StatusCode, Body)>> {
let query = {
if let Some(query) = req.uri.query() {
let input: ReadFileQuery =
from_bytes(query.as_bytes()).map_err(|e| s3_error!(InvalidArgument, "get query failed1 {:?}", e))?;
input
} else {
ReadFileQuery::default()
}
};
let Some(disk) = find_local_disk(&query.disk).await else {
return Err(s3_error!(InvalidArgument, "disk not found"));
};
let file = disk
.read_file_stream(&query.volume, &query.path, query.offset, query.length)
.await
.map_err(|e| s3_error!(InternalError, "read file err {}", e))?;
Ok(S3Response::new((
StatusCode::OK,
Body::from(StreamingBlob::wrap(bytes_stream(
ReaderStream::with_capacity(file, READ_BUFFER_SIZE),
query.length,
))),
)))
}
}
// /rustfs/rpc/read_file_stream?disk={}&volume={}&path={}&offset={}&length={}"
#[derive(Debug, Default, serde::Deserialize)]
pub struct PutFileQuery {
disk: String,
volume: String,
path: String,
append: bool,
size: usize,
}
pub struct PutFile {}
#[async_trait::async_trait]
impl Operation for PutFile {
async fn call(&self, req: S3Request<Body>, _params: Params<'_, '_>) -> S3Result<S3Response<(StatusCode, Body)>> {
let query = {
if let Some(query) = req.uri.query() {
let input: PutFileQuery =
from_bytes(query.as_bytes()).map_err(|e| s3_error!(InvalidArgument, "get query failed1 {:?}", e))?;
input
} else {
PutFileQuery::default()
}
};
let Some(disk) = find_local_disk(&query.disk).await else {
return Err(s3_error!(InvalidArgument, "disk not found"));
};
let mut file = if query.append {
disk.append_file(&query.volume, &query.path)
.await
.map_err(|e| s3_error!(InternalError, "append file err {}", e))?
} else {
disk.create_file("", &query.volume, &query.path, query.size)
.await
.map_err(|e| s3_error!(InternalError, "read file err {}", e))?
};
let mut body = StreamReader::new(
req.input
.into_stream()
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)),
);
tokio::io::copy(&mut body, &mut file)
.await
.map_err(|e| s3_error!(InternalError, "copy err {}", e))?;
Ok(S3Response::new((StatusCode::OK, Body::empty())))
}
}

View File

@@ -1,18 +1,11 @@
use std::{
collections::HashMap,
error::Error,
io::{Cursor, ErrorKind},
pin::Pin,
};
use std::{collections::HashMap, io::Cursor, pin::Pin};
use ecstore::{
admin_server_info::get_local_server_property,
bucket::{metadata::load_bucket_metadata, metadata_sys},
disk::{
DeleteOptions, DiskAPI, DiskInfoOptions, DiskStore, FileInfoVersions, ReadMultipleReq, ReadOptions, Reader,
UpdateMetadataOpts,
DeleteOptions, DiskAPI, DiskInfoOptions, DiskStore, FileInfoVersions, ReadMultipleReq, ReadOptions, UpdateMetadataOpts,
},
erasure::Writer,
error::Error as EcsError,
heal::{
data_usage_cache::DataUsageCache,
@@ -51,25 +44,25 @@ use tracing::{debug, error, info};
type ResponseStream<T> = Pin<Box<dyn Stream<Item = Result<T, tonic::Status>> + Send>>;
fn match_for_io_error(err_status: &Status) -> Option<&std::io::Error> {
let mut err: &(dyn Error + 'static) = err_status;
// fn match_for_io_error(err_status: &Status) -> Option<&std::io::Error> {
// let mut err: &(dyn Error + 'static) = err_status;
loop {
if let Some(io_err) = err.downcast_ref::<std::io::Error>() {
return Some(io_err);
}
// loop {
// if let Some(io_err) = err.downcast_ref::<std::io::Error>() {
// return Some(io_err);
// }
// h2::Error do not expose std::io::Error with `source()`
// https://github.com/hyperium/h2/pull/462
if let Some(h2_err) = err.downcast_ref::<h2::Error>() {
if let Some(io_err) = h2_err.get_io() {
return Some(io_err);
}
}
// // h2::Error do not expose std::io::Error with `source()`
// // https://github.com/hyperium/h2/pull/462
// if let Some(h2_err) = err.downcast_ref::<h2::Error>() {
// if let Some(io_err) = h2_err.get_io() {
// return Some(io_err);
// }
// }
err = err.source()?;
}
}
// err = err.source()?;
// }
// }
#[derive(Debug)]
pub struct NodeService {
@@ -559,238 +552,245 @@ impl Node for NodeService {
}
}
async fn write(&self, request: Request<WriteRequest>) -> Result<Response<WriteResponse>, Status> {
let request = request.into_inner();
if let Some(disk) = self.find_disk(&request.disk).await {
let file_writer = if request.is_append {
disk.append_file(&request.volume, &request.path).await
} else {
disk.create_file("", &request.volume, &request.path, 0).await
};
async fn write(&self, _request: Request<WriteRequest>) -> Result<Response<WriteResponse>, Status> {
unimplemented!("write");
// let request = request.into_inner();
// if let Some(disk) = self.find_disk(&request.disk).await {
// let file_writer = if request.is_append {
// disk.append_file(&request.volume, &request.path).await
// } else {
// disk.create_file("", &request.volume, &request.path, 0).await
// };
match file_writer {
Ok(mut file_writer) => match file_writer.write(&request.data).await {
Ok(_) => Ok(tonic::Response::new(WriteResponse {
success: true,
error: None,
})),
Err(err) => Ok(tonic::Response::new(WriteResponse {
success: false,
error: Some(err_to_proto_err(&err, &format!("write failed: {}", err))),
})),
},
Err(err) => Ok(tonic::Response::new(WriteResponse {
success: false,
error: Some(err_to_proto_err(&err, &format!("get writer failed: {}", err))),
})),
}
} else {
Ok(tonic::Response::new(WriteResponse {
success: false,
error: Some(err_to_proto_err(
&EcsError::new(StorageError::InvalidArgument(Default::default(), Default::default(), Default::default())),
"can not find disk",
)),
}))
}
// match file_writer {
// Ok(mut file_writer) => match file_writer.write(&request.data).await {
// Ok(_) => Ok(tonic::Response::new(WriteResponse {
// success: true,
// error: None,
// })),
// Err(err) => Ok(tonic::Response::new(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(&err, &format!("write failed: {}", err))),
// })),
// },
// Err(err) => Ok(tonic::Response::new(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(&err, &format!("get writer failed: {}", err))),
// })),
// }
// } else {
// Ok(tonic::Response::new(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(
// &EcsError::new(StorageError::InvalidArgument(Default::default(), Default::default(), Default::default())),
// "can not find disk",
// )),
// }))
// }
}
type WriteStreamStream = ResponseStream<WriteResponse>;
async fn write_stream(&self, request: Request<Streaming<WriteRequest>>) -> Result<Response<Self::WriteStreamStream>, Status> {
async fn write_stream(
&self,
_request: Request<Streaming<WriteRequest>>,
) -> Result<Response<Self::WriteStreamStream>, Status> {
info!("write_stream");
let mut in_stream = request.into_inner();
let (tx, rx) = mpsc::channel(128);
unimplemented!("write_stream");
tokio::spawn(async move {
let mut file_ref = None;
while let Some(result) = in_stream.next().await {
match result {
// Ok(v) => tx
// .send(Ok(EchoResponse { message: v.message }))
// .await
// .expect("working rx"),
Ok(v) => {
match file_ref.as_ref() {
Some(_) => (),
None => {
if let Some(disk) = find_local_disk(&v.disk).await {
let file_writer = if v.is_append {
disk.append_file(&v.volume, &v.path).await
} else {
disk.create_file("", &v.volume, &v.path, 0).await
};
// let mut in_stream = request.into_inner();
// let (tx, rx) = mpsc::channel(128);
match file_writer {
Ok(file_writer) => file_ref = Some(file_writer),
Err(err) => {
tx.send(Ok(WriteResponse {
success: false,
error: Some(err_to_proto_err(
&err,
&format!("get get file writer failed: {}", err),
)),
}))
.await
.expect("working rx");
break;
}
}
} else {
tx.send(Ok(WriteResponse {
success: false,
error: Some(err_to_proto_err(
&EcsError::new(StorageError::InvalidArgument(
Default::default(),
Default::default(),
Default::default(),
)),
"can not find disk",
)),
}))
.await
.expect("working rx");
break;
}
}
};
// tokio::spawn(async move {
// let mut file_ref = None;
// while let Some(result) = in_stream.next().await {
// match result {
// // Ok(v) => tx
// // .send(Ok(EchoResponse { message: v.message }))
// // .await
// // .expect("working rx"),
// Ok(v) => {
// match file_ref.as_ref() {
// Some(_) => (),
// None => {
// if let Some(disk) = find_local_disk(&v.disk).await {
// let file_writer = if v.is_append {
// disk.append_file(&v.volume, &v.path).await
// } else {
// disk.create_file("", &v.volume, &v.path, 0).await
// };
match file_ref.as_mut().unwrap().write(&v.data).await {
Ok(_) => tx.send(Ok(WriteResponse {
success: true,
error: None,
})),
Err(err) => tx.send(Ok(WriteResponse {
success: false,
error: Some(err_to_proto_err(&err, &format!("write failed: {}", err))),
})),
}
.await
.unwrap();
}
Err(err) => {
if let Some(io_err) = match_for_io_error(&err) {
if io_err.kind() == ErrorKind::BrokenPipe {
// here you can handle special case when client
// disconnected in unexpected way
eprintln!("\tclient disconnected: broken pipe");
break;
}
}
// match file_writer {
// Ok(file_writer) => file_ref = Some(file_writer),
// Err(err) => {
// tx.send(Ok(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(
// &err,
// &format!("get get file writer failed: {}", err),
// )),
// }))
// .await
// .expect("working rx");
// break;
// }
// }
// } else {
// tx.send(Ok(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(
// &EcsError::new(StorageError::InvalidArgument(
// Default::default(),
// Default::default(),
// Default::default(),
// )),
// "can not find disk",
// )),
// }))
// .await
// .expect("working rx");
// break;
// }
// }
// };
match tx.send(Err(err)).await {
Ok(_) => (),
Err(_err) => break, // response was dropped
}
}
}
}
println!("\tstream ended");
});
// match file_ref.as_mut().unwrap().write(&v.data).await {
// Ok(_) => tx.send(Ok(WriteResponse {
// success: true,
// error: None,
// })),
// Err(err) => tx.send(Ok(WriteResponse {
// success: false,
// error: Some(err_to_proto_err(&err, &format!("write failed: {}", err))),
// })),
// }
// .await
// .unwrap();
// }
// Err(err) => {
// if let Some(io_err) = match_for_io_error(&err) {
// if io_err.kind() == ErrorKind::BrokenPipe {
// // here you can handle special case when client
// // disconnected in unexpected way
// eprintln!("\tclient disconnected: broken pipe");
// break;
// }
// }
let out_stream = ReceiverStream::new(rx);
// match tx.send(Err(err)).await {
// Ok(_) => (),
// Err(_err) => break, // response was dropped
// }
// }
// }
// }
// println!("\tstream ended");
// });
Ok(tonic::Response::new(Box::pin(out_stream)))
// let out_stream = ReceiverStream::new(rx);
// Ok(tonic::Response::new(Box::pin(out_stream)))
}
type ReadAtStream = ResponseStream<ReadAtResponse>;
async fn read_at(&self, request: Request<Streaming<ReadAtRequest>>) -> Result<Response<Self::ReadAtStream>, Status> {
async fn read_at(&self, _request: Request<Streaming<ReadAtRequest>>) -> Result<Response<Self::ReadAtStream>, Status> {
info!("read_at");
unimplemented!("read_at");
let mut in_stream = request.into_inner();
let (tx, rx) = mpsc::channel(128);
// let mut in_stream = request.into_inner();
// let (tx, rx) = mpsc::channel(128);
tokio::spawn(async move {
let mut file_ref = None;
while let Some(result) = in_stream.next().await {
match result {
Ok(v) => {
match file_ref.as_ref() {
Some(_) => (),
None => {
if let Some(disk) = find_local_disk(&v.disk).await {
match disk.read_file(&v.volume, &v.path).await {
Ok(file_reader) => file_ref = Some(file_reader),
Err(err) => {
tx.send(Ok(ReadAtResponse {
success: false,
data: Vec::new(),
error: Some(err_to_proto_err(&err, &format!("read file failed: {}", err))),
read_size: -1,
}))
.await
.expect("working rx");
break;
}
}
} else {
tx.send(Ok(ReadAtResponse {
success: false,
data: Vec::new(),
error: Some(err_to_proto_err(
&EcsError::new(StorageError::InvalidArgument(
Default::default(),
Default::default(),
Default::default(),
)),
"can not find disk",
)),
read_size: -1,
}))
.await
.expect("working rx");
break;
}
}
};
// tokio::spawn(async move {
// let mut file_ref = None;
// while let Some(result) = in_stream.next().await {
// match result {
// Ok(v) => {
// match file_ref.as_ref() {
// Some(_) => (),
// None => {
// if let Some(disk) = find_local_disk(&v.disk).await {
// match disk.read_file(&v.volume, &v.path).await {
// Ok(file_reader) => file_ref = Some(file_reader),
// Err(err) => {
// tx.send(Ok(ReadAtResponse {
// success: false,
// data: Vec::new(),
// error: Some(err_to_proto_err(&err, &format!("read file failed: {}", err))),
// read_size: -1,
// }))
// .await
// .expect("working rx");
// break;
// }
// }
// } else {
// tx.send(Ok(ReadAtResponse {
// success: false,
// data: Vec::new(),
// error: Some(err_to_proto_err(
// &EcsError::new(StorageError::InvalidArgument(
// Default::default(),
// Default::default(),
// Default::default(),
// )),
// "can not find disk",
// )),
// read_size: -1,
// }))
// .await
// .expect("working rx");
// break;
// }
// }
// };
let mut data = vec![0u8; v.length.try_into().unwrap()];
// let mut data = vec![0u8; v.length.try_into().unwrap()];
match file_ref
.as_mut()
.unwrap()
.read_at(v.offset.try_into().unwrap(), &mut data)
.await
{
Ok(read_size) => tx.send(Ok(ReadAtResponse {
success: true,
data,
read_size: read_size.try_into().unwrap(),
error: None,
})),
Err(err) => tx.send(Ok(ReadAtResponse {
success: false,
data: Vec::new(),
error: Some(err_to_proto_err(&err, &format!("read at failed: {}", err))),
read_size: -1,
})),
}
.await
.unwrap();
}
Err(err) => {
if let Some(io_err) = match_for_io_error(&err) {
if io_err.kind() == ErrorKind::BrokenPipe {
// here you can handle special case when client
// disconnected in unexpected way
eprintln!("\tclient disconnected: broken pipe");
break;
}
}
// match file_ref
// .as_mut()
// .unwrap()
// .read_at(v.offset.try_into().unwrap(), &mut data)
// .await
// {
// Ok(read_size) => tx.send(Ok(ReadAtResponse {
// success: true,
// data,
// read_size: read_size.try_into().unwrap(),
// error: None,
// })),
// Err(err) => tx.send(Ok(ReadAtResponse {
// success: false,
// data: Vec::new(),
// error: Some(err_to_proto_err(&err, &format!("read at failed: {}", err))),
// read_size: -1,
// })),
// }
// .await
// .unwrap();
// }
// Err(err) => {
// if let Some(io_err) = match_for_io_error(&err) {
// if io_err.kind() == ErrorKind::BrokenPipe {
// // here you can handle special case when client
// // disconnected in unexpected way
// eprintln!("\tclient disconnected: broken pipe");
// break;
// }
// }
match tx.send(Err(err)).await {
Ok(_) => (),
Err(_err) => break, // response was dropped
}
}
}
}
println!("\tstream ended");
});
// match tx.send(Err(err)).await {
// Ok(_) => (),
// Err(_err) => break, // response was dropped
// }
// }
// }
// }
// println!("\tstream ended");
// });
let out_stream = ReceiverStream::new(rx);
// let out_stream = ReceiverStream::new(rx);
Ok(tonic::Response::new(Box::pin(out_stream)))
// Ok(tonic::Response::new(Box::pin(out_stream)))
}
async fn list_dir(&self, request: Request<ListDirRequest>) -> Result<Response<ListDirResponse>, Status> {

View File

@@ -161,6 +161,10 @@ async fn run(opt: config::Opt) -> Result<()> {
"created endpoints {}, set_count:{}, drives_per_set: {}, cmd: {:?}",
i, eps.set_count, eps.drives_per_set, eps.cmd_line
);
for ep in eps.endpoints.as_ref().iter() {
info!(" - {}", ep);
}
}
set_global_addr(&opt.address).await;

View File

@@ -20,6 +20,7 @@ use ecstore::bucket::policy_sys::PolicySys;
use ecstore::bucket::tagging::decode_tags;
use ecstore::bucket::tagging::encode_tags;
use ecstore::bucket::versioning_sys::BucketVersioningSys;
use ecstore::io::READ_BUFFER_SIZE;
use ecstore::new_object_layer_fn;
use ecstore::store_api::BucketOptions;
use ecstore::store_api::CompletePart;
@@ -51,6 +52,8 @@ use s3s::S3;
use s3s::{S3Request, S3Response};
use std::fmt::Debug;
use std::str::FromStr;
use tokio_util::io::ReaderStream;
use tokio_util::io::StreamReader;
use tracing::debug;
use tracing::error;
use tracing::info;
@@ -464,8 +467,13 @@ impl S3 for FS {
};
let last_modified = info.mod_time.map(Timestamp::from);
let body = Some(StreamingBlob::wrap(bytes_stream(
ReaderStream::with_capacity(reader.stream, READ_BUFFER_SIZE),
info.size,
)));
let output = GetObjectOutput {
body: Some(reader.stream),
body,
content_length: Some(info.size as i64),
last_modified,
content_type,
@@ -799,6 +807,10 @@ impl S3 for FS {
}
};
let body = Box::new(StreamReader::new(
body.map(|f| f.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))),
));
let mut reader = PutObjReader::new(body, content_length as usize);
let Some(store) = new_object_layer_fn() else {
@@ -911,6 +923,10 @@ impl S3 for FS {
}
};
let body = Box::new(StreamReader::new(
body.map(|f| f.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))),
));
// mc cp step 4
let mut data = PutObjReader::new(body, content_length as usize);
let opts = ObjectOptions::default();

View File

@@ -6,8 +6,8 @@ fi
current_dir=$(pwd)
mkdir -p ./target/volume/test
# mkdir -p ./target/volume/test{0..4}
# mkdir -p ./target/volume/test
mkdir -p ./target/volume/test{0..4}
if [ -z "$RUST_LOG" ]; then
@@ -19,8 +19,8 @@ fi
# export RUSTFS_STORAGE_CLASS_INLINE_BLOCK="512 KB"
# RUSTFS_VOLUMES="./target/volume/test{0...4}"
export RUSTFS_VOLUMES="./target/volume/test"
export RUSTFS_VOLUMES="./target/volume/test{0...4}"
# export RUSTFS_VOLUMES="./target/volume/test"
export RUSTFS_ADDRESS="0.0.0.0:9000"
export RUSTFS_CONSOLE_ENABLE=true
export RUSTFS_CONSOLE_ADDRESS="0.0.0.0:9002"