diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e11da3cf..886a5bc0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,11 +3,11 @@ name: Build on: workflow_dispatch: schedule: - - cron: '0 0 * * 0' # at midnight of each sunday + - cron: "0 0 * * 0" # at midnight of each sunday push: branches: - main - tags: [ 'v*', '*' ] + tags: ["v*", "*"] jobs: build-rustfs: @@ -16,9 +16,17 @@ jobs: strategy: matrix: variant: - - { profile: dev, target: x86_64-unknown-linux-gnu, glibc: "default" } - - { profile: release, target: x86_64-unknown-linux-gnu, glibc: "default" } - - { profile: release, target: x86_64-unknown-linux-gnu, glibc: "2.31" } + - { profile: dev, target: x86_64-unknown-linux-gnu, glibc: "default" } + - { + profile: release, + target: x86_64-unknown-linux-gnu, + glibc: "default", + } + - { + profile: release, + target: x86_64-unknown-linux-gnu, + glibc: "2.31", + } steps: - uses: actions/checkout@v4 @@ -51,13 +59,13 @@ jobs: ARTIFACT_NAME="${ARTIFACT_NAME}-glibc${{ matrix.variant.glibc }}" fi echo "artifact_name=${ARTIFACT_NAME}" >> $GITHUB_OUTPUT - + # Determine binary path bin_path="target/artifacts/rustfs.${{ matrix.variant.profile }}.${{ matrix.variant.target }}.bin" if [ -f "target/artifacts/rustfs.${{ matrix.variant.profile }}.${{ matrix.variant.target }}.glibc${{ matrix.variant.glibc }}.bin" ]; then bin_path="target/artifacts/rustfs.${{ matrix.variant.profile }}.${{ matrix.variant.target }}.glibc${{ matrix.variant.glibc }}.bin" fi - + # Create package mkdir -p ${ARTIFACT_NAME} cp "$bin_path" ${ARTIFACT_NAME}/rustfs @@ -69,6 +77,16 @@ jobs: name: ${{ steps.package.outputs.artifact_name }} path: ${{ steps.package.outputs.artifact_name }}.zip retention-days: 7 + - name: Upload to Aliyun OSS + uses: JohnGuan/oss-upload-action@main + with: + key-id: ${{ secrets.ALICLOUDOSS_KEY_ID }} + key-secret: ${{ secrets.ALICLOUDOSS_KEY_SECRET }} + region: oss-cn-beijing + bucket: rustfs-artifacts + assets: | + ${{ steps.package.outputs.artifact_name }}.zip:/artifacts/rustfs/${{ steps.package.outputs.artifact_name }}.zip + ${{ steps.package.outputs.artifact_name }}.zip:/artifacts/rustfs/${{ steps.package.outputs.artifact_name }}.latest.zip build-rustfs-gui: runs-on: ubuntu-latest @@ -88,7 +106,7 @@ jobs: name: "rustfs-${{ matrix.variant.profile }}-${{ matrix.variant.target }}" - name: Display structure of downloaded files run: | - ls -R + ls -R unzip -o -j "rustfs-${{ matrix.variant.profile }}-${{ matrix.variant.target }}.zip" -d ./cli/rustfs-gui/embedded-rustfs/ ls -la cli/rustfs-gui/embedded-rustfs - name: Cache dioxus-cli @@ -108,12 +126,12 @@ jobs: - name: Build and Bundle rustfs-gui run: | ls -la - + release_path="target/${{ matrix.variant.target }}" mkdir -p ${release_path} cd cli/rustfs-gui ls -la embedded-rustfs - + # Configure the linker based on the target case "${{ matrix.target }}" in "x86_64-unknown-linux-gnu") @@ -140,7 +158,7 @@ jobs: # Validating Environment Variables (for Debugging) echo "CC for ${{ matrix.target }}: $CC_${{ matrix.target }}" echo "Linker for ${{ matrix.target }}: $CARGO_TARGET_${{ matrix.target }}_LINKER" - + if [[ "${{ matrix.variant.target }}" == *"apple-darwin"* ]]; then dx bundle --platform macos --package-types "macos" --package-types "dmg" --package-types "ios" --release --profile release --out-dir ../../${release_path} elif [[ "${{ matrix.variant.target }}" == *"windows-msvc"* ]]; then @@ -159,13 +177,23 @@ jobs: name: ${{ steps.package.outputs.gui_artifact_name }} path: ${{ steps.package.outputs.gui_artifact_name }}.zip retention-days: 7 + - name: Upload to Aliyun OSS + uses: JohnGuan/oss-upload-action@main + with: + key-id: ${{ secrets.ALICLOUDOSS_KEY_ID }} + key-secret: ${{ secrets.ALICLOUDOSS_KEY_SECRET }} + region: oss-cn-beijing + bucket: rustfs-artifacts + assets: | + ${{ steps.package.outputs.gui_artifact_name }}.zip:/artifacts/rustfs/${{ steps.package.outputs.gui_artifact_name }}.zip + ${{ steps.package.outputs.gui_artifact_name }}.zip:/artifacts/rustfs/${{ steps.package.outputs.gui_artifact_name }}.latest.zip merge: runs-on: ubuntu-latest - needs: [ build-rustfs, build-rustfs-gui ] + needs: [build-rustfs, build-rustfs-gui] steps: - uses: actions/upload-artifact/merge@v4 with: name: rustfs-packages - pattern: 'rustfs-*' + pattern: "rustfs-*" delete-merged: true diff --git a/Cargo.lock b/Cargo.lock index 18226104..1e189aa2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,6 +63,20 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.2.15", + "once_cell", + "version_check", + "zerocopy 0.7.35", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -72,6 +86,27 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -143,6 +178,28 @@ version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" +[[package]] +name = "api" +version = "0.0.1" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "datafusion", + "ecstore", + "futures", + "futures-core", + "http", + "object_store", + "s3s", + "snafu", + "tokio", + "tokio-util", + "tracing", + "transform-stream", + "url", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -167,12 +224,229 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "arrow" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5ec52ba94edeed950e4a41f75d35376df196e8cb04437f7280a5aa49f20f796" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc766fdacaf804cb10c7c70580254fcdb5d55cdfda2bc57b02baf5223a3af9e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12fcdb3f1d03f69d3ec26ac67645a8fe3f878d77b5ebb0b15d64a116c212985" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.2", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede6175fbc039dfc946a61c1b6d42fd682fcecf5ab5d148fbe7667705798cac9" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1644877d8bc9a0ef022d9153dc29375c2bda244c39aec05a91d0e87ccf77995f" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "regex", +] + +[[package]] +name = "arrow-data" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ff528658b521e33905334723b795ee56b393dbe9cf76c8b1f64b648c65a60c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee5b4ca98a7fb2efb9ab3309a5d1c88b5116997ff93f3147efdc1062a6158e9" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.8.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0a3334a743bd2a1479dbc635540617a3923b4b2f6870f37357339e6b5363c21" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d1d7a7291d2c5107e92140f75257a99343956871f3d3ab33a7b41532f79cb68" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9" + +[[package]] +name = "arrow-select" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69efcd706420e52cd44f5c4358d279801993846d1c2a8e52111853d61d55a619" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21546b337ab304a32cfc0770f671db7411787586b45b78b4593ae78e64e2b03" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] + [[package]] name = "ashpd" version = "0.8.1" @@ -232,6 +506,23 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-compression" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + [[package]] name = "async-io" version = "2.4.0" @@ -450,9 +741,9 @@ dependencies = [ [[package]] name = "backon" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fef586913a57ff189f25c9b3d034356a5bf6b3fa9a7f067588fe1698ba1f5d" +checksum = "970d91570c01a8a5959b36ad7dd1c30642df24b6b3068710066f6809f7033bb7" dependencies = [ "fastrand", "gloo-timers", @@ -502,6 +793,19 @@ version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" +[[package]] +name = "bigdecimal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -526,6 +830,19 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "blake3" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34a796731680be7931955498a16a10b2270c7762963d5d570fdbfe02dcbf314f" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block" version = "0.1.6" @@ -581,6 +898,27 @@ dependencies = [ "piper", ] +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.17.0" @@ -601,9 +939,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "bytesize" -version = "1.3.2" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2c12f985c78475a6b8d629afd0c360260ef34cfef52efccdcfd31972f81c2e" +checksum = "2e93abca9e28e0a1b9877922aacb20576e05d4679ffa78c3d6dc22a26a216659" [[package]] name = "bytestring" @@ -614,6 +952,25 @@ dependencies = [ "bytes", ] +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cairo-rs" version = "0.18.5" @@ -641,10 +998,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.16" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -726,6 +1085,27 @@ dependencies = [ "windows-link", ] +[[package]] +name = "chrono-tz" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efdce149c370f133a071ca8ef6ea340b7b88748ab0810097a9e2976eaa34b4f3" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf 0.11.3", +] + +[[package]] +name = "chrono-tz-build" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f10f8c9340e31fc120ff885fcdb54a0b48e474bbd77cab557f0c30a3e569402" +dependencies = [ + "parse-zoneinfo", + "phf_codegen 0.11.3", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -766,9 +1146,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.32" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" +checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff" dependencies = [ "clap_builder", "clap_derive", @@ -776,9 +1156,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.32" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" +checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489" dependencies = [ "anstream", "anstyle", @@ -880,6 +1260,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "comfy-table" +version = "7.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "common" version = "0.0.1" @@ -1017,6 +1407,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "convert_case" version = "0.4.0" @@ -1219,6 +1615,27 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa 1.0.15", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +dependencies = [ + "memchr", +] + [[package]] name = "ctr" version = "0.9.2" @@ -1230,9 +1647,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ "darling_core", "darling_macro", @@ -1240,22 +1657,23 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", + "strsim", "syn 2.0.100", ] [[package]] name = "darling_macro" -version = "0.20.10" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core", "quote", @@ -1275,12 +1693,505 @@ dependencies = [ "parking_lot_core 0.9.10", ] +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core 0.9.10", +] + [[package]] name = "data-encoding" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "575f75dfd25738df5b91b8e43e14d44bda14637a58fae779fd2b064f8bf3e010" +[[package]] +name = "datafusion" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3" +dependencies = [ + "arrow", + "arrow-ipc", + "arrow-schema", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-macros", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot 0.12.3", + "parquet", + "rand 0.8.5", + "regex", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2" +dependencies = [ + "arrow", + "async-trait", + "dashmap 6.1.0", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "parking_lot 0.12.3", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ipc", + "base64 0.22.1", + "half", + "hashbrown 0.14.5", + "indexmap 2.8.0", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand 0.8.5", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-doc" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256" + +[[package]] +name = "datafusion-execution" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1" +dependencies = [ + "arrow", + "dashmap 6.1.0", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot 0.12.3", + "rand 0.8.5", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.8.0", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.8.0", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand 0.8.5", + "regex", + "sha2 0.10.8", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot 0.12.3", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6" +dependencies = [ + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7" +dependencies = [ + "datafusion-expr", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "datafusion-optimizer" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "indexmap 2.8.0", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "datafusion-physical-expr" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.8.0", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap 2.8.0", + "itertools 0.14.0", + "log", + "parking_lot 0.12.3", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "46.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64" +dependencies = [ + "arrow", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap 2.8.0", + "log", + "recursive", + "regex", + "sqlparser", +] + [[package]] name = "dbus" version = "0.9.7" @@ -1307,14 +2218,45 @@ dependencies = [ [[package]] name = "deranged" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" dependencies = [ "powerfmt", "serde", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn 2.0.100", +] + [[package]] name = "derive_more" version = "0.99.19" @@ -2107,9 +3049,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" dependencies = [ "event-listener", "pin-project-lite", @@ -2485,9 +3427,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -2744,6 +3688,7 @@ checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" dependencies = [ "cfg-if", "crunchy", + "num-traits", ] [[package]] @@ -2752,7 +3697,7 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.8", ] [[package]] @@ -2760,6 +3705,10 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] [[package]] name = "hashbrown" @@ -3010,7 +3959,7 @@ dependencies = [ "ecstore", "futures", "ipnetwork", - "itertools", + "itertools 0.14.0", "jsonwebtoken", "lazy_static", "log", @@ -3030,14 +3979,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b2fd658b06e56721792c5df4475705b6cda790e9298d19d2f8af083457bcd127" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core 0.52.0", ] @@ -3092,9 +4042,9 @@ dependencies = [ [[package]] name = "icu_locid_transform_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d" [[package]] name = "icu_normalizer" @@ -3116,9 +4066,9 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7" [[package]] name = "icu_properties" @@ -3137,9 +4087,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2" [[package]] name = "icu_provider" @@ -3244,6 +4194,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "ipnet" version = "2.11.0" @@ -3271,6 +4227,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -3337,6 +4302,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -3423,6 +4397,70 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libappindicator" version = "0.9.0" @@ -3581,9 +4619,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.26" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "longest-increasing-subsequence" @@ -3600,6 +4638,26 @@ dependencies = [ "hashbrown 0.12.3", ] +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mac" version = "0.1.1" @@ -4014,6 +5072,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -4264,10 +5323,31 @@ dependencies = [ ] [[package]] -name = "once_cell" -version = "1.21.1" +name = "object_store" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" +checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.13.0", + "parking_lot 0.12.3", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opaque-debug" @@ -4407,6 +5487,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -4518,6 +5607,51 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "parquet" +version = "54.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb15796ac6f56b429fd99e33ba133783ad75b27c36b4b5ce06f1f82cc97754e" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.2", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + [[package]] name = "password-hash" version = "0.5.0" @@ -4593,9 +5727,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +checksum = "198db74531d58c70a361c42201efde7e2591e976d518caf7662a47dc5720e7b6" dependencies = [ "memchr", "thiserror 2.0.12", @@ -4604,9 +5738,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "d725d9cfd79e87dccc9341a2ef39d1b6f6353d68c4b33c177febbe1a402c97c5" dependencies = [ "pest", "pest_generator", @@ -4614,9 +5748,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "db7d01726be8ab66ab32f9df467ae8b1148906685bbe75c82d1e65d7f5b3f841" dependencies = [ "pest", "pest_meta", @@ -4627,9 +5761,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.15" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0" dependencies = [ "once_cell", "pest", @@ -4666,6 +5800,15 @@ dependencies = [ "phf_shared 0.10.0", ] +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", +] + [[package]] name = "phf_codegen" version = "0.8.0" @@ -4686,6 +5829,16 @@ dependencies = [ "phf_shared 0.10.0", ] +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + [[package]] name = "phf_generator" version = "0.8.0" @@ -4830,7 +5983,7 @@ dependencies = [ "crypto", "futures", "ipnetwork", - "itertools", + "itertools 0.14.0", "jsonwebtoken", "lazy_static", "log", @@ -4909,7 +6062,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.24", ] [[package]] @@ -5039,7 +6192,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -5059,7 +6212,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.100", @@ -5116,10 +6269,36 @@ dependencies = [ ] [[package]] -name = "quick-xml" -version = "0.37.2" +name = "psm" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88" +dependencies = [ + "cc", +] + +[[package]] +name = "query" +version = "0.0.1" +dependencies = [ + "api", + "async-recursion", + "async-trait", + "datafusion", + "derive_builder", + "futures", + "parking_lot 0.12.3", + "s3s", + "snafu", + "tokio", + "tracing", +] + +[[package]] +name = "quick-xml" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf763ab1c7a3aa408be466efc86efe35ed1bd3dd74173ed39d6b0d0a6f0ba148" dependencies = [ "memchr", "serde", @@ -5127,11 +6306,12 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +checksum = "c3bd15a6f2967aef83887dcb9fec0014580467e33720d073560cf015a5683012" dependencies = [ "bytes", + "cfg_aliases", "pin-project-lite", "quinn-proto", "quinn-udp", @@ -5141,17 +6321,18 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "web-time", ] [[package]] name = "quinn-proto" -version = "0.11.9" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc" dependencies = [ "bytes", - "getrandom 0.2.15", - "rand 0.8.5", + "getrandom 0.3.2", + "rand 0.9.0", "ring", "rustc-hash 2.1.1", "rustls", @@ -5165,9 +6346,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944" +checksum = "541d0f57c6ec747a90738a52741d3221f7960e8ac2f0ff4b1a63680e033b4ab5" dependencies = [ "cfg_aliases", "libc", @@ -5225,7 +6406,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy", + "zerocopy 0.8.24", ] [[package]] @@ -5345,6 +6526,26 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.100", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -5446,9 +6647,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.14" +version = "0.12.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e327e510263980e231de548a33e63d34962d29ae61b467389a1a09627a254" +checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" dependencies = [ "base64 0.22.1", "bytes", @@ -5666,6 +6867,7 @@ dependencies = [ name = "rustfs" version = "0.1.0" dependencies = [ + "api", "async-trait", "atoi", "axum", @@ -5675,6 +6877,8 @@ dependencies = [ "common", "const-str", "crypto", + "csv", + "datafusion", "ecstore", "flatbuffers", "futures", @@ -5701,6 +6905,7 @@ dependencies = [ "prost-types", "protobuf 3.7.2", "protos", + "query", "rmp-serde", "rust-embed", "rustfs-obs", @@ -5792,9 +6997,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7178faa4b75a30e269c71e61c353ce2748cf3d76f0c44c393f4e60abf49b825" +checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" dependencies = [ "bitflags 2.9.0", "errno", @@ -5838,9 +7043,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.0" +version = "0.103.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa4eeac2588ffff23e9d7a7e9b3f971c5fb5b7ebc9452745e0c232c64f83b2f" +checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" dependencies = [ "ring", "rustls-pki-types", @@ -6003,6 +7208,12 @@ dependencies = [ "futures-core", ] +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.219" @@ -6107,7 +7318,7 @@ checksum = "4fae7a3038a32e5a34ba32c6c45eb4852f8affaf8b794ebfcd4b1099e2d62ebe" dependencies = [ "bytes", "const_format", - "dashmap", + "dashmap 5.5.3", "futures", "gloo-net", "http", @@ -6351,10 +7562,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] -name = "socket2" -version = "0.5.8" +name = "snafu" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "backtrace", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" dependencies = [ "libc", "windows-sys 0.52.0", @@ -6392,12 +7631,47 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "sqlparser" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601f9201feb9b09c00266478bf459952b9ef9a6b94edb2f21eba14ab681a60a9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -6406,9 +7680,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "std-next" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafdb55260d9b29c04fa52351e0db2a4aaeadc462cd884ccd7771c5a31aaf1aa" +checksum = "1258af3e6890fc6369f89e9d5c052c3406707aef3f8e836697155fb6e07153bf" dependencies = [ "simdutf8", "thiserror 2.0.12", @@ -6416,9 +7690,9 @@ dependencies = [ [[package]] name = "string_cache" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" dependencies = [ "new_debug_unreachable", "parking_lot 0.12.3", @@ -6608,14 +7882,14 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600" +checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ "fastrand", "getrandom 0.3.2", "once_cell", - "rustix 1.0.2", + "rustix 1.0.3", "windows-sys 0.59.0", ] @@ -6720,10 +7994,21 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.3.40" +name = "thrift" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d9c75b47bdff86fa3334a3db91356b8d7d86a9b839dab7d0bdc5c3d3a077618" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "time" +version = "0.3.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", "itoa 1.0.15", @@ -6744,9 +8029,9 @@ checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" [[package]] name = "time-macros" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29aa485584182073ed57fd5004aa09c371f021325014694e432313345865fd04" +checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" dependencies = [ "num-conv", "time-core", @@ -7216,6 +8501,16 @@ dependencies = [ "utf-8", ] +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.18.0" @@ -7257,6 +8552,12 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -7328,9 +8629,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "getrandom 0.3.2", + "js-sys", "rand 0.9.0", "serde", "uuid-macro-internal", + "wasm-bindgen", ] [[package]] @@ -7733,9 +9036,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" [[package]] name = "windows-registry" @@ -7743,7 +9046,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ - "windows-result 0.3.1", + "windows-result 0.3.2", "windows-strings 0.3.1", "windows-targets 0.53.0", ] @@ -7759,9 +9062,9 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" dependencies = [ "windows-link", ] @@ -7885,9 +9188,9 @@ dependencies = [ [[package]] name = "windows-version" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bfbcc4996dd183ff1376a20ade1242da0d2dcaff83cc76710a588d24fd4c5db" +checksum = "e04a5c6627e310a23ad2358483286c7df260c964eb2d003d8efd6d0f4e79265c" dependencies = [ "windows-link", ] @@ -8198,10 +9501,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] -name = "yaml-rust2" -version = "0.10.0" +name = "xz2" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "232bdb534d65520716bef0bbb205ff8f2db72d807b19c0bc3020853b92a0cd4b" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "yaml-rust2" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "818913695e83ece1f8d2a1c52d54484b7b46d0f9c06beeb2649b9da50d9b512d" dependencies = [ "arraydeque", "encoding_rs", @@ -8348,18 +9660,38 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.23" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd97444d05a4328b90e75e503a34bad781f14e28a823ad3557f0750df1ebcbc6" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "zerocopy-derive", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" +dependencies = [ + "zerocopy-derive 0.8.24", ] [[package]] name = "zerocopy-derive" -version = "0.8.23" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" dependencies = [ "proc-macro2", "quote", @@ -8415,6 +9747,34 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.15+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zvariant" version = "4.2.0" diff --git a/Cargo.toml b/Cargo.toml index 40dd5159..665a4d7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ members = [ "crypto", # Cryptography and security features "cli/rustfs-gui", # Graphical user interface client "packages/obs", # Observability utilities + "s3select/api", + "s3select/query", ] resolver = "2" @@ -30,6 +32,7 @@ all = "warn" [workspace.dependencies] madmin = { path = "./madmin" } +async-recursion = "1.0.5" async-trait = "0.1.87" backon = "1.3.0" bytes = "1.9.0" @@ -37,6 +40,8 @@ bytesize = "1.3.0" chrono = { version = "0.4.40", features = ["serde"] } clap = { version = "4.5.31", features = ["derive", "env"] } config = "0.15.9" +datafusion = "46.0.0" +derive_builder = "0.20.2" dioxus = { version = "0.6.3", features = ["router"] } dirs = "6.0.0" ecstore = { path = "./ecstore" } @@ -44,7 +49,7 @@ flatbuffers = "24.12.23" futures = "0.3.31" futures-util = "0.3.31" common = { path = "./common/common" } -policy = {path = "./policy"} +policy = { path = "./policy" } hex = "0.4.3" hyper = "1.6.0" hyper-util = { version = "0.1.10", features = [ @@ -107,6 +112,7 @@ tonic = { version = "0.12.3", features = ["gzip"] } tonic-build = "0.12.3" tonic-reflection = "0.12" tokio-stream = "0.1.17" +tokio-util = { version = "0.7.13", features = ["io", "compat"] } tower = { version = "0.5.2", features = ["timeout"] } tracing = "0.1.41" tracing-core = "0.1.33" @@ -126,6 +132,8 @@ axum = "0.7.9" md-5 = "0.10.6" workers = { path = "./common/workers" } test-case = "3.3.1" +zip = "2.2.3" +snafu = "0.8.5" [profile.wasm-dev] inherits = "dev" diff --git a/ecstore/src/io.rs b/ecstore/src/io.rs index 764c8834..3ca27fe0 100644 --- a/ecstore/src/io.rs +++ b/ecstore/src/io.rs @@ -140,10 +140,14 @@ impl EtagReader { impl AsyncRead for EtagReader { fn poll_read(mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>) -> Poll> { + let befor_size = buf.filled().len(); + match Pin::new(&mut self.inner).poll_read(cx, buf) { Poll::Ready(Ok(())) => { - let bytes = buf.filled(); - self.md5.update(bytes); + if buf.filled().len() > befor_size { + let bytes = &buf.filled()[befor_size..]; + self.md5.update(bytes); + } Poll::Ready(Ok(())) } diff --git a/ecstore/src/utils/os/linux.rs b/ecstore/src/utils/os/linux.rs index ca64f818..064b74ae 100644 --- a/ecstore/src/utils/os/linux.rs +++ b/ecstore/src/utils/os/linux.rs @@ -151,7 +151,7 @@ fn read_drive_stats(stats_file: &str) -> Result { fn read_stat(file_name: &str) -> Result> { // 打开文件 let path = Path::new(file_name); - let file = File::open(&path)?; + let file = File::open(path)?; // 创建一个 BufReader let reader = io::BufReader::new(file); @@ -161,7 +161,8 @@ fn read_stat(file_name: &str) -> Result> { if let Some(line) = reader.lines().next() { let line = line?; // 分割行并解析为 u64 - for token in line.trim().split_whitespace() { + // https://rust-lang.github.io/rust-clippy/master/index.html#trim_split_whitespace + for token in line.split_whitespace() { let ui64: u64 = token.parse()?; stats.push(ui64); } diff --git a/rustfs/Cargo.toml b/rustfs/Cargo.toml index 6bc9881f..62f504d9 100644 --- a/rustfs/Cargo.toml +++ b/rustfs/Cargo.toml @@ -20,6 +20,8 @@ madmin.workspace = true async-trait.workspace = true bytes.workspace = true clap.workspace = true +csv = "1.3.1" +datafusion = { workspace = true } common.workspace = true ecstore.workspace = true policy.workspace =true @@ -46,7 +48,7 @@ serde.workspace = true serde_json.workspace = true tracing.workspace = true time = { workspace = true, features = ["parsing", "formatting", "serde"] } -tokio-util = { version = "0.7.13", features = ["io", "compat"] } +tokio-util.workspace = true tokio = { workspace = true, features = [ "rt-multi-thread", "macros", @@ -71,6 +73,8 @@ const-str = { version = "0.6.1", features = ["std", "proc"] } atoi = "2.0.0" serde_urlencoded = "0.7.1" crypto = { path = "../crypto" } +query = { path = "../s3select/query" } +api = { path = "../s3select/api" } iam = { path = "../iam" } jsonwebtoken = "9.3.0" tower-http = { version = "0.6.2", features = ["cors"] } diff --git a/rustfs/src/auth.rs b/rustfs/src/auth.rs index 42d4a464..3d042553 100644 --- a/rustfs/src/auth.rs +++ b/rustfs/src/auth.rs @@ -113,15 +113,15 @@ pub fn check_claims_from_token(header: &HeaderMap, cred: &auth::Credentials) -> } if token.is_empty() && cred.is_temp() && !cred.is_service_account() { - return Err(s3_error!(InvalidRequest, "invalid token")); + return Err(s3_error!(InvalidRequest, "invalid token1")); } if !token.is_empty() && !cred.is_temp() { - return Err(s3_error!(InvalidRequest, "invalid token")); + return Err(s3_error!(InvalidRequest, "invalid token2")); } if !cred.is_service_account() && cred.is_temp() && token != cred.session_token { - return Err(s3_error!(InvalidRequest, "invalid token")); + return Err(s3_error!(InvalidRequest, "invalid token3")); } if cred.is_temp() && cred.is_expired() { diff --git a/rustfs/src/storage/ecfs.rs b/rustfs/src/storage/ecfs.rs index 34549305..f517220a 100644 --- a/rustfs/src/storage/ecfs.rs +++ b/rustfs/src/storage/ecfs.rs @@ -4,8 +4,14 @@ use super::options::extract_metadata; use super::options::put_opts; use crate::auth::get_condition_values; use crate::storage::access::ReqInfo; +use api::query::Context; +use api::query::Query; +use api::server::dbms::DatabaseManagerSystem; use bytes::Bytes; use common::error::Result; +use datafusion::arrow::csv::WriterBuilder as CsvWriterBuilder; +use datafusion::arrow::json::writer::JsonArray; +use datafusion::arrow::json::WriterBuilder as JsonWriterBuilder; use ecstore::bucket::error::BucketMetadataError; use ecstore::bucket::metadata::BUCKET_LIFECYCLE_CONFIG; use ecstore::bucket::metadata::BUCKET_NOTIFICATION_CONFIG; @@ -46,6 +52,7 @@ use policy::policy::action::S3Action; use policy::policy::BucketPolicy; use policy::policy::BucketPolicyArgs; use policy::policy::Validator; +use query::instance::make_rustfsms; use s3s::dto::*; use s3s::s3_error; use s3s::S3Error; @@ -55,6 +62,8 @@ use s3s::S3; use s3s::{S3Request, S3Response}; use std::fmt::Debug; use std::str::FromStr; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; use tokio_util::io::ReaderStream; use tokio_util::io::StreamReader; use tracing::debug; @@ -1859,6 +1868,69 @@ impl S3 for FS { } Ok(S3Response::new(PutObjectAclOutput::default())) } + + async fn select_object_content( + &self, + req: S3Request, + ) -> S3Result> { + info!("handle select_object_content"); + + let input = req.input; + info!("{:?}", input); + + let db = make_rustfsms(input.clone(), false).await.map_err(|e| { + error!("make db failed, {}", e.to_string()); + s3_error!(InternalError) + })?; + let query = Query::new(Context { input: input.clone() }, input.request.expression); + let result = db.execute(&query).await.map_err(|_| s3_error!(InternalError))?; + + let results = result.result().chunk_result().await.unwrap().to_vec(); + + let mut buffer = Vec::new(); + if input.request.output_serialization.csv.is_some() { + let mut csv_writer = CsvWriterBuilder::new().with_header(false).build(&mut buffer); + for batch in results { + csv_writer + .write(&batch) + .map_err(|e| s3_error!(InternalError, "cann't encode output to csv. e: {}", e.to_string()))?; + } + } else if input.request.output_serialization.json.is_some() { + let mut json_writer = JsonWriterBuilder::new() + .with_explicit_nulls(true) + .build::<_, JsonArray>(&mut buffer); + for batch in results { + json_writer + .write(&batch) + .map_err(|e| s3_error!(InternalError, "cann't encode output to json. e: {}", e.to_string()))?; + } + json_writer + .finish() + .map_err(|e| s3_error!(InternalError, "writer output into json error, e: {}", e.to_string()))?; + } else { + return Err(s3_error!(InvalidArgument, "unknow output format")); + } + + let (tx, rx) = mpsc::channel::>(2); + let stream = ReceiverStream::new(rx); + tokio::spawn(async move { + let _ = tx + .send(Ok(SelectObjectContentEvent::Cont(ContinuationEvent::default()))) + .await; + let _ = tx + .send(Ok(SelectObjectContentEvent::Records(RecordsEvent { + payload: Some(Bytes::from(buffer)), + }))) + .await; + let _ = tx.send(Ok(SelectObjectContentEvent::End(EndEvent::default()))).await; + + drop(tx); + }); + + Ok(S3Response::new(SelectObjectContentOutput { + payload: Some(SelectObjectContentEventStream::new(stream)), + })) + } } #[allow(dead_code)] diff --git a/s3select/api/Cargo.toml b/s3select/api/Cargo.toml new file mode 100644 index 00000000..1936e846 --- /dev/null +++ b/s3select/api/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "api" +version.workspace = true +edition.workspace = true + +[dependencies] +async-trait.workspace = true +bytes.workspace = true +chrono.workspace = true +datafusion = { workspace = true } +ecstore.workspace = true +futures = { workspace = true } +futures-core = "0.3.31" +http.workspace = true +object_store = "0.11.2" +s3s.workspace = true +snafu = { workspace = true, features = ["backtrace"] } +tokio.workspace = true +tokio-util.workspace = true +tracing.workspace = true +transform-stream.workspace = true +url.workspace = true \ No newline at end of file diff --git a/s3select/api/src/lib.rs b/s3select/api/src/lib.rs new file mode 100644 index 00000000..acaead72 --- /dev/null +++ b/s3select/api/src/lib.rs @@ -0,0 +1,77 @@ +use std::fmt::Display; + +use datafusion::{common::DataFusionError, sql::sqlparser::parser::ParserError}; +use snafu::{Backtrace, Location, Snafu}; + +pub mod object_store; +pub mod query; +pub mod server; + +pub type QueryResult = Result; + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub))] +pub enum QueryError { + Datafusion { + source: DataFusionError, + location: Location, + backtrace: Backtrace, + }, + + #[snafu(display("This feature is not implemented: {}", err))] + NotImplemented { err: String }, + + #[snafu(display("Multi-statement not allow, found num:{}, sql:{}", num, sql))] + MultiStatement { num: usize, sql: String }, + + #[snafu(display("Failed to build QueryDispatcher. err: {}", err))] + BuildQueryDispatcher { err: String }, + + #[snafu(display("The query has been canceled"))] + Cancel, + + #[snafu(display("{}", source))] + Parser { source: ParserError }, + + #[snafu(display("Udf not exists, name:{}.", name))] + FunctionNotExists { name: String }, + + #[snafu(display("Udf already exists, name:{}.", name))] + FunctionExists { name: String }, + + #[snafu(display("Store Error, e:{}.", e))] + StoreError { e: String }, +} + +impl From for QueryError { + fn from(value: DataFusionError) -> Self { + match value { + DataFusionError::External(e) if e.downcast_ref::().is_some() => *e.downcast::().unwrap(), + + v => Self::Datafusion { + source: v, + location: Default::default(), + backtrace: Backtrace::capture(), + }, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ResolvedTable { + // path + table: String, +} + +impl ResolvedTable { + pub fn table(&self) -> &str { + &self.table + } +} + +impl Display for ResolvedTable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { table } = self; + write!(f, "{table}") + } +} diff --git a/s3select/api/src/object_store.rs b/s3select/api/src/object_store.rs new file mode 100644 index 00000000..7772936c --- /dev/null +++ b/s3select/api/src/object_store.rs @@ -0,0 +1,177 @@ +use async_trait::async_trait; +use bytes::Bytes; +use chrono::Utc; +use ecstore::io::READ_BUFFER_SIZE; +use ecstore::new_object_layer_fn; +use ecstore::store::ECStore; +use ecstore::store_api::ObjectIO; +use ecstore::store_api::ObjectOptions; +use ecstore::StorageAPI; +use futures::pin_mut; +use futures::{Stream, StreamExt}; +use futures_core::stream::BoxStream; +use http::HeaderMap; +use object_store::path::Path; +use object_store::Attributes; +use object_store::GetOptions; +use object_store::GetResult; +use object_store::ListResult; +use object_store::MultipartUpload; +use object_store::ObjectMeta; +use object_store::ObjectStore; +use object_store::PutMultipartOpts; +use object_store::PutOptions; +use object_store::PutPayload; +use object_store::PutResult; +use object_store::{Error as o_Error, Result}; +use s3s::dto::SelectObjectContentInput; +use s3s::s3_error; +use s3s::S3Result; +use std::ops::Range; +use std::sync::Arc; +use tokio_util::io::ReaderStream; +use tracing::info; +use transform_stream::AsyncTryStream; + +#[derive(Debug)] +pub struct EcObjectStore { + input: SelectObjectContentInput, + + store: Arc, +} + +impl EcObjectStore { + pub fn new(input: SelectObjectContentInput) -> S3Result { + let Some(store) = new_object_layer_fn() else { + return Err(s3_error!(InternalError, "ec store not inited")); + }; + + Ok(Self { input, store }) + } +} + +impl std::fmt::Display for EcObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("EcObjectStore") + } +} + +#[async_trait] +impl ObjectStore for EcObjectStore { + async fn put_opts(&self, _location: &Path, _payload: PutPayload, _opts: PutOptions) -> Result { + unimplemented!() + } + + async fn put_multipart_opts(&self, _location: &Path, _opts: PutMultipartOpts) -> Result> { + unimplemented!() + } + + async fn get_opts(&self, location: &Path, _options: GetOptions) -> Result { + info!("{:?}", location); + let opts = ObjectOptions::default(); + let h = HeaderMap::new(); + let reader = self + .store + .get_object_reader(&self.input.bucket, &self.input.key, None, h, &opts) + .await + .map_err(|_| o_Error::NotFound { + path: format!("{}/{}", self.input.bucket, self.input.key), + source: "can not get object info".into(), + })?; + + // let stream = stream::unfold(reader.stream, |mut blob| async move { + // match blob.next().await { + // Some(Ok(chunk)) => { + // let bytes = chunk; + // Some((Ok(bytes), blob)) + // } + // _ => None, + // } + // }) + // .boxed(); + let meta = ObjectMeta { + location: location.clone(), + last_modified: Utc::now(), + size: reader.object_info.size, + e_tag: reader.object_info.etag, + version: None, + }; + let attributes = Attributes::default(); + + Ok(GetResult { + payload: object_store::GetResultPayload::Stream( + bytes_stream(ReaderStream::with_capacity(reader.stream, READ_BUFFER_SIZE), reader.object_info.size).boxed(), + ), + meta, + range: 0..reader.object_info.size, + attributes, + }) + } + + async fn get_ranges(&self, _location: &Path, _ranges: &[Range]) -> Result> { + unimplemented!() + } + + async fn head(&self, location: &Path) -> Result { + info!("{:?}", location); + let opts = ObjectOptions::default(); + let info = self + .store + .get_object_info(&self.input.bucket, &self.input.key, &opts) + .await + .map_err(|_| o_Error::NotFound { + path: format!("{}/{}", self.input.bucket, self.input.key), + source: "can not get object info".into(), + })?; + + Ok(ObjectMeta { + location: location.clone(), + last_modified: Utc::now(), + size: info.size, + e_tag: info.etag, + version: None, + }) + } + + async fn delete(&self, _location: &Path) -> Result<()> { + unimplemented!() + } + + fn list(&self, _prefix: Option<&Path>) -> BoxStream<'_, Result> { + unimplemented!() + } + + async fn list_with_delimiter(&self, _prefix: Option<&Path>) -> Result { + unimplemented!() + } + + async fn copy(&self, _from: &Path, _to: &Path) -> Result<()> { + unimplemented!() + } + + async fn copy_if_not_exists(&self, _from: &Path, _too: &Path) -> Result<()> { + unimplemented!() + } +} + +pub fn bytes_stream(stream: S, content_length: usize) -> impl Stream> + Send + 'static +where + S: Stream> + Send + 'static, +{ + AsyncTryStream::::new(|mut y| async move { + pin_mut!(stream); + let mut remaining: usize = content_length; + while let Some(result) = stream.next().await { + let mut bytes = result.map_err(|e| o_Error::Generic { + store: "", + source: Box::new(e), + })?; + if bytes.len() > remaining { + bytes.truncate(remaining); + } + remaining -= bytes.len(); + y.yield_ok(bytes).await; + } + Ok(()) + }) +} diff --git a/s3select/api/src/query/analyzer.rs b/s3select/api/src/query/analyzer.rs new file mode 100644 index 00000000..db849566 --- /dev/null +++ b/s3select/api/src/query/analyzer.rs @@ -0,0 +1,12 @@ +use std::sync::Arc; + +use datafusion::logical_expr::LogicalPlan; + +use super::session::SessionCtx; +use crate::QueryResult; + +pub type AnalyzerRef = Arc; + +pub trait Analyzer { + fn analyze(&self, plan: &LogicalPlan, session: &SessionCtx) -> QueryResult; +} diff --git a/s3select/api/src/query/ast.rs b/s3select/api/src/query/ast.rs new file mode 100644 index 00000000..dbe9b4b2 --- /dev/null +++ b/s3select/api/src/query/ast.rs @@ -0,0 +1,8 @@ +use datafusion::sql::sqlparser::ast::Statement; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExtStatement { + /// ANSI SQL AST node + SqlStatement(Box), + // we can expand command +} diff --git a/s3select/api/src/query/datasource/mod.rs b/s3select/api/src/query/datasource/mod.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/s3select/api/src/query/datasource/mod.rs @@ -0,0 +1 @@ + diff --git a/s3select/api/src/query/dispatcher.rs b/s3select/api/src/query/dispatcher.rs new file mode 100644 index 00000000..433ddf01 --- /dev/null +++ b/s3select/api/src/query/dispatcher.rs @@ -0,0 +1,32 @@ +use std::sync::Arc; + +use async_trait::async_trait; + +use crate::QueryResult; + +use super::{ + execution::{Output, QueryStateMachine}, + logical_planner::Plan, + Query, +}; + +#[async_trait] +pub trait QueryDispatcher: Send + Sync { + // fn create_query_id(&self) -> QueryId; + + // fn query_info(&self, id: &QueryId); + + async fn execute_query(&self, query: &Query) -> QueryResult; + + async fn build_logical_plan(&self, query_state_machine: Arc) -> QueryResult>; + + async fn execute_logical_plan(&self, logical_plan: Plan, query_state_machine: Arc) -> QueryResult; + + async fn build_query_state_machine(&self, query: Query) -> QueryResult>; + + // fn running_query_infos(&self) -> Vec; + + // fn running_query_status(&self) -> Vec; + + // fn cancel_query(&self, id: &QueryId); +} diff --git a/s3select/api/src/query/execution.rs b/s3select/api/src/query/execution.rs new file mode 100644 index 00000000..10c48acc --- /dev/null +++ b/s3select/api/src/query/execution.rs @@ -0,0 +1,241 @@ +use std::fmt::Display; +use std::pin::Pin; +use std::sync::atomic::{AtomicPtr, Ordering}; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::{Duration, Instant}; + +use async_trait::async_trait; +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::physical_plan::SendableRecordBatchStream; +use futures::{Stream, StreamExt, TryStreamExt}; + +use crate::{QueryError, QueryResult}; + +use super::logical_planner::Plan; +use super::session::SessionCtx; +use super::Query; + +pub type QueryExecutionRef = Arc; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum QueryType { + Batch, + Stream, +} + +impl Display for QueryType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Batch => write!(f, "batch"), + Self::Stream => write!(f, "stream"), + } + } +} + +#[async_trait] +pub trait QueryExecution: Send + Sync { + fn query_type(&self) -> QueryType { + QueryType::Batch + } + // 开始 + async fn start(&self) -> QueryResult; + // 停止 + fn cancel(&self) -> QueryResult<()>; +} + +pub enum Output { + StreamData(SendableRecordBatchStream), + Nil(()), +} + +impl Output { + pub fn schema(&self) -> SchemaRef { + match self { + Self::StreamData(stream) => stream.schema(), + Self::Nil(_) => Arc::new(Schema::empty()), + } + } + + pub async fn chunk_result(self) -> QueryResult> { + match self { + Self::Nil(_) => Ok(vec![]), + Self::StreamData(stream) => { + let schema = stream.schema(); + let mut res: Vec = stream.try_collect::>().await?; + if res.is_empty() { + res.push(RecordBatch::new_empty(schema)); + } + Ok(res) + } + } + } + + pub async fn num_rows(self) -> usize { + match self.chunk_result().await { + Ok(rb) => rb.iter().map(|e| e.num_rows()).sum(), + Err(_) => 0, + } + } + + /// Returns the number of records affected by the query operation + /// + /// If it is a select statement, returns the number of rows in the result set + /// + /// -1 means unknown + /// + /// panic! when StreamData's number of records greater than i64::Max + pub async fn affected_rows(self) -> i64 { + self.num_rows().await as i64 + } +} + +impl Stream for Output { + type Item = std::result::Result; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.get_mut(); + match this { + Output::StreamData(stream) => stream.poll_next_unpin(cx).map_err(|e| e.into()), + Output::Nil(_) => Poll::Ready(None), + } + } +} + +#[async_trait] +pub trait QueryExecutionFactory { + async fn create_query_execution( + &self, + plan: Plan, + query_state_machine: QueryStateMachineRef, + ) -> QueryResult; +} + +pub type QueryStateMachineRef = Arc; + +pub struct QueryStateMachine { + pub session: SessionCtx, + pub query: Query, + + state: AtomicPtr, + start: Instant, +} + +impl QueryStateMachine { + pub fn begin(query: Query, session: SessionCtx) -> Self { + Self { + session, + query, + state: AtomicPtr::new(Box::into_raw(Box::new(QueryState::ACCEPTING))), + start: Instant::now(), + } + } + + pub fn begin_analyze(&self) { + // TODO record time + self.translate_to(Box::new(QueryState::RUNNING(RUNNING::ANALYZING))); + } + + pub fn end_analyze(&self) { + // TODO record time + } + + pub fn begin_optimize(&self) { + // TODO record time + self.translate_to(Box::new(QueryState::RUNNING(RUNNING::OPTMIZING))); + } + + pub fn end_optimize(&self) { + // TODO + } + + pub fn begin_schedule(&self) { + // TODO + self.translate_to(Box::new(QueryState::RUNNING(RUNNING::SCHEDULING))); + } + + pub fn end_schedule(&self) { + // TODO + } + + pub fn finish(&self) { + // TODO + self.translate_to(Box::new(QueryState::DONE(DONE::FINISHED))); + } + + pub fn cancel(&self) { + // TODO + self.translate_to(Box::new(QueryState::DONE(DONE::CANCELLED))); + } + + pub fn fail(&self) { + // TODO + self.translate_to(Box::new(QueryState::DONE(DONE::FAILED))); + } + + pub fn state(&self) -> &QueryState { + unsafe { &*self.state.load(Ordering::Relaxed) } + } + + pub fn duration(&self) -> Duration { + self.start.elapsed() + } + + fn translate_to(&self, state: Box) { + self.state.store(Box::into_raw(state), Ordering::Relaxed); + } +} + +#[derive(Debug, Clone)] +pub enum QueryState { + ACCEPTING, + RUNNING(RUNNING), + DONE(DONE), +} + +impl AsRef for QueryState { + fn as_ref(&self) -> &str { + match self { + QueryState::ACCEPTING => "ACCEPTING", + QueryState::RUNNING(e) => e.as_ref(), + QueryState::DONE(e) => e.as_ref(), + } + } +} + +#[derive(Debug, Clone)] +pub enum RUNNING { + DISPATCHING, + ANALYZING, + OPTMIZING, + SCHEDULING, +} + +impl AsRef for RUNNING { + fn as_ref(&self) -> &str { + match self { + Self::DISPATCHING => "DISPATCHING", + Self::ANALYZING => "ANALYZING", + Self::OPTMIZING => "OPTMIZING", + Self::SCHEDULING => "SCHEDULING", + } + } +} + +#[derive(Debug, Clone)] +pub enum DONE { + FINISHED, + FAILED, + CANCELLED, +} + +impl AsRef for DONE { + fn as_ref(&self) -> &str { + match self { + Self::FINISHED => "FINISHED", + Self::FAILED => "FAILED", + Self::CANCELLED => "CANCELLED", + } + } +} diff --git a/s3select/api/src/query/function.rs b/s3select/api/src/query/function.rs new file mode 100644 index 00000000..af207fc1 --- /dev/null +++ b/s3select/api/src/query/function.rs @@ -0,0 +1,23 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; + +use crate::QueryResult; + +pub type FuncMetaManagerRef = Arc; +pub trait FunctionMetadataManager { + fn register_udf(&mut self, udf: ScalarUDF) -> QueryResult<()>; + + fn register_udaf(&mut self, udaf: AggregateUDF) -> QueryResult<()>; + + fn register_udwf(&mut self, udwf: WindowUDF) -> QueryResult<()>; + + fn udf(&self, name: &str) -> QueryResult>; + + fn udaf(&self, name: &str) -> QueryResult>; + + fn udwf(&self, name: &str) -> QueryResult>; + + fn udfs(&self) -> HashSet; +} diff --git a/s3select/api/src/query/logical_planner.rs b/s3select/api/src/query/logical_planner.rs new file mode 100644 index 00000000..cef844b3 --- /dev/null +++ b/s3select/api/src/query/logical_planner.rs @@ -0,0 +1,40 @@ +use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::logical_expr::LogicalPlan as DFPlan; + +use crate::QueryResult; + +use super::ast::ExtStatement; +use super::session::SessionCtx; + +#[derive(Clone)] +pub enum Plan { + // only support query sql + /// Query plan + Query(QueryPlan), +} + +impl Plan { + pub fn schema(&self) -> SchemaRef { + match self { + Self::Query(p) => SchemaRef::from(p.df_plan.schema().as_ref().to_owned()), + } + } +} + +#[derive(Debug, Clone)] +pub struct QueryPlan { + pub df_plan: DFPlan, + pub is_tag_scan: bool, +} + +impl QueryPlan { + pub fn is_explain(&self) -> bool { + matches!(self.df_plan, DFPlan::Explain(_) | DFPlan::Analyze(_)) + } +} + +#[async_trait] +pub trait LogicalPlanner { + async fn create_logical_plan(&self, statement: ExtStatement, session: &SessionCtx) -> QueryResult; +} diff --git a/s3select/api/src/query/mod.rs b/s3select/api/src/query/mod.rs new file mode 100644 index 00000000..6ddd2dc8 --- /dev/null +++ b/s3select/api/src/query/mod.rs @@ -0,0 +1,41 @@ +use s3s::dto::SelectObjectContentInput; + +pub mod analyzer; +pub mod ast; +pub mod datasource; +pub mod dispatcher; +pub mod execution; +pub mod function; +pub mod logical_planner; +pub mod optimizer; +pub mod parser; +pub mod physical_planner; +pub mod scheduler; +pub mod session; + +#[derive(Clone)] +pub struct Context { + // maybe we need transfer some info? + pub input: SelectObjectContentInput, +} + +#[derive(Clone)] +pub struct Query { + context: Context, + content: String, +} + +impl Query { + #[inline(always)] + pub fn new(context: Context, content: String) -> Self { + Self { context, content } + } + + pub fn context(&self) -> &Context { + &self.context + } + + pub fn content(&self) -> &str { + self.content.as_str() + } +} diff --git a/s3select/api/src/query/optimizer.rs b/s3select/api/src/query/optimizer.rs new file mode 100644 index 00000000..c2392eb9 --- /dev/null +++ b/s3select/api/src/query/optimizer.rs @@ -0,0 +1,15 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use datafusion::physical_plan::ExecutionPlan; + +use super::logical_planner::QueryPlan; +use super::session::SessionCtx; +use crate::QueryResult; + +pub type OptimizerRef = Arc; + +#[async_trait] +pub trait Optimizer { + async fn optimize(&self, plan: &QueryPlan, session: &SessionCtx) -> QueryResult>; +} diff --git a/s3select/api/src/query/parser.rs b/s3select/api/src/query/parser.rs new file mode 100644 index 00000000..76d7e723 --- /dev/null +++ b/s3select/api/src/query/parser.rs @@ -0,0 +1,8 @@ +use std::collections::VecDeque; + +use super::ast::ExtStatement; +use crate::QueryResult; + +pub trait Parser { + fn parse(&self, sql: &str) -> QueryResult>; +} diff --git a/s3select/api/src/query/physical_planner.rs b/s3select/api/src/query/physical_planner.rs new file mode 100644 index 00000000..c71787e9 --- /dev/null +++ b/s3select/api/src/query/physical_planner.rs @@ -0,0 +1,21 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use datafusion::logical_expr::LogicalPlan; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_planner::ExtensionPlanner; + +use super::session::SessionCtx; +use crate::QueryResult; + +#[async_trait] +pub trait PhysicalPlanner { + /// Given a `LogicalPlan`, create an `ExecutionPlan` suitable for execution + async fn create_physical_plan( + &self, + logical_plan: &LogicalPlan, + session_state: &SessionCtx, + ) -> QueryResult>; + + fn inject_physical_transform_rule(&mut self, rule: Arc); +} diff --git a/s3select/api/src/query/scheduler.rs b/s3select/api/src/query/scheduler.rs new file mode 100644 index 00000000..3dd49c22 --- /dev/null +++ b/s3select/api/src/query/scheduler.rs @@ -0,0 +1,32 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use datafusion::common::Result; +use datafusion::execution::context::TaskContext; +use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; + +pub type SchedulerRef = Arc; + +#[async_trait] +pub trait Scheduler { + /// Schedule the provided [`ExecutionPlan`] on this [`Scheduler`]. + /// + /// Returns a [`ExecutionResults`] that can be used to receive results as they are produced, + /// as a [`futures::Stream`] of [`RecordBatch`] + async fn schedule(&self, plan: Arc, context: Arc) -> Result; +} + +pub struct ExecutionResults { + stream: SendableRecordBatchStream, +} + +impl ExecutionResults { + pub fn new(stream: SendableRecordBatchStream) -> Self { + Self { stream } + } + + /// Returns a [`SendableRecordBatchStream`] of this execution + pub fn stream(self) -> SendableRecordBatchStream { + self.stream + } +} diff --git a/s3select/api/src/query/session.rs b/s3select/api/src/query/session.rs new file mode 100644 index 00000000..c9d91f51 --- /dev/null +++ b/s3select/api/src/query/session.rs @@ -0,0 +1,86 @@ +use std::sync::Arc; + +use bytes::Bytes; +use datafusion::{ + execution::{context::SessionState, runtime_env::RuntimeEnvBuilder, SessionStateBuilder}, + prelude::SessionContext, +}; +use object_store::{memory::InMemory, path::Path, ObjectStore}; +use tracing::error; + +use crate::{object_store::EcObjectStore, QueryError, QueryResult}; + +use super::Context; + +#[derive(Clone)] +pub struct SessionCtx { + _desc: Arc, + inner: SessionState, +} + +impl SessionCtx { + pub fn inner(&self) -> &SessionState { + &self.inner + } +} + +#[derive(Clone)] +pub struct SessionCtxDesc { + // maybe we need some info +} + +#[derive(Default)] +pub struct SessionCtxFactory { + pub is_test: bool, +} + +impl SessionCtxFactory { + pub async fn create_session_ctx(&self, context: &Context) -> QueryResult { + let df_session_ctx = self.build_df_session_context(context).await?; + + Ok(SessionCtx { + _desc: Arc::new(SessionCtxDesc {}), + inner: df_session_ctx.state(), + }) + } + + async fn build_df_session_context(&self, context: &Context) -> QueryResult { + let path = format!("s3://{}", context.input.bucket); + let store_url = url::Url::parse(&path).unwrap(); + let rt = RuntimeEnvBuilder::new().build()?; + let df_session_state = SessionStateBuilder::new() + .with_runtime_env(Arc::new(rt)) + .with_default_features(); + + let df_session_state = if self.is_test { + let store: Arc = Arc::new(InMemory::new()); + let data = b"id,name,age,department,salary + 1,Alice,25,HR,5000 + 2,Bob,30,IT,6000 + 3,Charlie,35,Finance,7000 + 4,Diana,22,Marketing,4500 + 5,Eve,28,IT,5500 + 6,Frank,40,Finance,8000 + 7,Grace,26,HR,5200 + 8,Henry,32,IT,6200 + 9,Ivy,24,Marketing,4800 + 10,Jack,38,Finance,7500"; + let data_bytes = Bytes::from(data.to_vec()); + let path = Path::from(context.input.key.clone()); + store.put(&path, data_bytes.into()).await.map_err(|e| { + error!("put data into memory failed: {}", e.to_string()); + QueryError::StoreError { e: e.to_string() } + })?; + + df_session_state.with_object_store(&store_url, Arc::new(store)).build() + } else { + let store = + EcObjectStore::new(context.input.clone()).map_err(|_| QueryError::NotImplemented { err: String::new() })?; + df_session_state.with_object_store(&store_url, Arc::new(store)).build() + }; + + let df_session_ctx = SessionContext::new_with_state(df_session_state); + + Ok(df_session_ctx) + } +} diff --git a/s3select/api/src/server/dbms.rs b/s3select/api/src/server/dbms.rs new file mode 100644 index 00000000..85d32055 --- /dev/null +++ b/s3select/api/src/server/dbms.rs @@ -0,0 +1,41 @@ +use async_trait::async_trait; + +use crate::{ + query::{ + execution::{Output, QueryStateMachineRef}, + logical_planner::Plan, + Query, + }, + QueryResult, +}; + +pub struct QueryHandle { + query: Query, + result: Output, +} + +impl QueryHandle { + pub fn new(query: Query, result: Output) -> Self { + Self { query, result } + } + + pub fn query(&self) -> &Query { + &self.query + } + + pub fn result(self) -> Output { + self.result + } +} + +#[async_trait] +pub trait DatabaseManagerSystem { + async fn execute(&self, query: &Query) -> QueryResult; + async fn build_query_state_machine(&self, query: Query) -> QueryResult; + async fn build_logical_plan(&self, query_state_machine: QueryStateMachineRef) -> QueryResult>; + async fn execute_logical_plan( + &self, + logical_plan: Plan, + query_state_machine: QueryStateMachineRef, + ) -> QueryResult; +} diff --git a/s3select/api/src/server/mod.rs b/s3select/api/src/server/mod.rs new file mode 100644 index 00000000..c2e7c7b5 --- /dev/null +++ b/s3select/api/src/server/mod.rs @@ -0,0 +1 @@ +pub mod dbms; diff --git a/s3select/query/Cargo.toml b/s3select/query/Cargo.toml new file mode 100644 index 00000000..61b0b07b --- /dev/null +++ b/s3select/query/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "query" +version.workspace = true +edition.workspace = true + +[dependencies] +api = { path = "../api" } +async-recursion = { workspace = true } +async-trait.workspace = true +datafusion = { workspace = true } +derive_builder = { workspace = true } +futures = { workspace = true } +parking_lot = { version = "0.12.1" } +s3s.workspace = true +snafu = { workspace = true, features = ["backtrace"] } +tokio = { workspace = true } +tracing = { workspace = true } \ No newline at end of file diff --git a/s3select/query/src/data_source/mod.rs b/s3select/query/src/data_source/mod.rs new file mode 100644 index 00000000..b0704130 --- /dev/null +++ b/s3select/query/src/data_source/mod.rs @@ -0,0 +1 @@ +pub mod table_source; diff --git a/s3select/query/src/data_source/table_source.rs b/s3select/query/src/data_source/table_source.rs new file mode 100644 index 00000000..77df6e81 --- /dev/null +++ b/s3select/query/src/data_source/table_source.rs @@ -0,0 +1,138 @@ +use std::any::Any; +use std::borrow::Cow; +use std::fmt::Display; +use std::sync::Arc; +use std::write; + +use async_trait::async_trait; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::common::Result as DFResult; +use datafusion::datasource::listing::ListingTable; +use datafusion::datasource::{provider_as_source, TableProvider}; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder, TableProviderFilterPushDown, TableSource}; +use datafusion::prelude::Expr; +use datafusion::sql::TableReference; +use tracing::debug; + +pub const TEMP_LOCATION_TABLE_NAME: &str = "external_location_table"; + +pub struct TableSourceAdapter { + database_name: String, + table_name: String, + table_handle: TableHandle, + + plan: LogicalPlan, +} + +impl TableSourceAdapter { + pub fn try_new( + table_ref: impl Into, + table_name: impl Into, + table_handle: impl Into, + ) -> Result { + let table_name: String = table_name.into(); + + let table_handle = table_handle.into(); + let plan = match &table_handle { + // TableScan + TableHandle::External(t) => { + let table_source = provider_as_source(t.clone()); + LogicalPlanBuilder::scan(table_ref, table_source, None)?.build()? + } + // TableScan + TableHandle::TableProvider(t) => { + let table_source = provider_as_source(t.clone()); + if let Some(plan) = table_source.get_logical_plan() { + LogicalPlanBuilder::from(plan.into_owned()).build()? + } else { + LogicalPlanBuilder::scan(table_ref, table_source, None)?.build()? + } + } + }; + + debug!("Table source logical plan node of {}:\n{}", table_name, plan.display_indent_schema()); + + Ok(Self { + database_name: "default_db".to_string(), + table_name, + table_handle, + plan, + }) + } + + pub fn database_name(&self) -> &str { + &self.database_name + } + + pub fn table_name(&self) -> &str { + &self.table_name + } + + pub fn table_handle(&self) -> &TableHandle { + &self.table_handle + } +} + +#[async_trait] +impl TableSource for TableSourceAdapter { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.table_handle.schema() + } + + fn supports_filters_pushdown(&self, filter: &[&Expr]) -> DFResult> { + self.table_handle.supports_filters_pushdown(filter) + } + + /// Called by [`InlineTableScan`] + fn get_logical_plan(&self) -> Option> { + Some(Cow::Owned(self.plan.clone())) + } +} + +#[derive(Clone)] +pub enum TableHandle { + TableProvider(Arc), + External(Arc), +} + +impl TableHandle { + pub fn schema(&self) -> SchemaRef { + match self { + Self::External(t) => t.schema(), + Self::TableProvider(t) => t.schema(), + } + } + + pub fn supports_filters_pushdown(&self, filter: &[&Expr]) -> DFResult> { + match self { + Self::External(t) => t.supports_filters_pushdown(filter), + Self::TableProvider(t) => t.supports_filters_pushdown(filter), + } + } +} + +impl From> for TableHandle { + fn from(value: Arc) -> Self { + TableHandle::TableProvider(value) + } +} + +impl From> for TableHandle { + fn from(value: Arc) -> Self { + TableHandle::External(value) + } +} + +impl Display for TableHandle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::External(e) => write!(f, "External({:?})", e.table_paths()), + Self::TableProvider(_) => write!(f, "TableProvider"), + } + } +} diff --git a/s3select/query/src/dispatcher/manager.rs b/s3select/query/src/dispatcher/manager.rs new file mode 100644 index 00000000..4abc4cec --- /dev/null +++ b/s3select/query/src/dispatcher/manager.rs @@ -0,0 +1,271 @@ +use std::{ + pin::Pin, + sync::Arc, + task::{Context, Poll}, +}; + +use api::{ + query::{ + ast::ExtStatement, + dispatcher::QueryDispatcher, + execution::{Output, QueryStateMachine}, + function::FuncMetaManagerRef, + logical_planner::{LogicalPlanner, Plan}, + parser::Parser, + session::{SessionCtx, SessionCtxFactory}, + Query, + }, + QueryError, QueryResult, +}; +use async_trait::async_trait; +use datafusion::{ + arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, + config::CsvOptions, + datasource::{ + file_format::{csv::CsvFormat, json::JsonFormat, parquet::ParquetFormat}, + listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl}, + }, + error::Result as DFResult, + execution::{RecordBatchStream, SendableRecordBatchStream}, +}; +use futures::{Stream, StreamExt}; +use s3s::dto::SelectObjectContentInput; + +use crate::{ + execution::factory::QueryExecutionFactoryRef, + metadata::{base_table::BaseTableProvider, ContextProviderExtension, MetadataProvider, TableHandleProviderRef}, + sql::logical::planner::DefaultLogicalPlanner, +}; + +#[derive(Clone)] +pub struct SimpleQueryDispatcher { + input: SelectObjectContentInput, + // client for default tenant + _default_table_provider: TableHandleProviderRef, + session_factory: Arc, + // parser + parser: Arc, + // get query execution factory + query_execution_factory: QueryExecutionFactoryRef, + func_manager: FuncMetaManagerRef, +} + +#[async_trait] +impl QueryDispatcher for SimpleQueryDispatcher { + async fn execute_query(&self, query: &Query) -> QueryResult { + let query_state_machine = { self.build_query_state_machine(query.clone()).await? }; + + let logical_plan = self.build_logical_plan(query_state_machine.clone()).await?; + let logical_plan = match logical_plan { + Some(plan) => plan, + None => return Ok(Output::Nil(())), + }; + let result = self.execute_logical_plan(logical_plan, query_state_machine).await?; + Ok(result) + } + + async fn build_logical_plan(&self, query_state_machine: Arc) -> QueryResult> { + let session = &query_state_machine.session; + let query = &query_state_machine.query; + + let scheme_provider = self.build_scheme_provider(session).await?; + + let logical_planner = DefaultLogicalPlanner::new(&scheme_provider); + + let statements = self.parser.parse(query.content())?; + + // not allow multi statement + if statements.len() > 1 { + return Err(QueryError::MultiStatement { + num: statements.len(), + sql: query_state_machine.query.content().to_string(), + }); + } + + let stmt = match statements.front() { + Some(stmt) => stmt.clone(), + None => return Ok(None), + }; + + let logical_plan = self + .statement_to_logical_plan(stmt, &logical_planner, query_state_machine) + .await?; + Ok(Some(logical_plan)) + } + + async fn execute_logical_plan(&self, logical_plan: Plan, query_state_machine: Arc) -> QueryResult { + self.execute_logical_plan(logical_plan, query_state_machine).await + } + + async fn build_query_state_machine(&self, query: Query) -> QueryResult> { + let session = self.session_factory.create_session_ctx(query.context()).await?; + + let query_state_machine = Arc::new(QueryStateMachine::begin(query, session)); + Ok(query_state_machine) + } +} + +impl SimpleQueryDispatcher { + async fn statement_to_logical_plan( + &self, + stmt: ExtStatement, + logical_planner: &DefaultLogicalPlanner<'_, S>, + query_state_machine: Arc, + ) -> QueryResult { + // begin analyze + query_state_machine.begin_analyze(); + let logical_plan = logical_planner + .create_logical_plan(stmt, &query_state_machine.session) + .await?; + query_state_machine.end_analyze(); + + Ok(logical_plan) + } + + async fn execute_logical_plan(&self, logical_plan: Plan, query_state_machine: Arc) -> QueryResult { + let execution = self + .query_execution_factory + .create_query_execution(logical_plan, query_state_machine.clone()) + .await?; + + match execution.start().await { + Ok(Output::StreamData(stream)) => Ok(Output::StreamData(Box::pin(TrackedRecordBatchStream { inner: stream }))), + Ok(nil @ Output::Nil(_)) => Ok(nil), + Err(err) => Err(err), + } + } + + async fn build_scheme_provider(&self, session: &SessionCtx) -> QueryResult { + let path = format!("s3://{}/{}", self.input.bucket, self.input.key); + let table_path = ListingTableUrl::parse(path)?; + let listing_options = if self.input.request.input_serialization.csv.is_some() { + let file_format = CsvFormat::default().with_options(CsvOptions::default().with_has_header(false)); + ListingOptions::new(Arc::new(file_format)).with_file_extension(".csv") + } else if self.input.request.input_serialization.parquet.is_some() { + let file_format = ParquetFormat::new(); + ListingOptions::new(Arc::new(file_format)).with_file_extension(".parquet") + } else if self.input.request.input_serialization.json.is_some() { + let file_format = JsonFormat::default(); + ListingOptions::new(Arc::new(file_format)).with_file_extension(".json") + } else { + return Err(QueryError::NotImplemented { + err: "not support this file type".to_string(), + }); + }; + + let resolve_schema = listing_options.infer_schema(session.inner(), &table_path).await?; + let config = ListingTableConfig::new(table_path) + .with_listing_options(listing_options) + .with_schema(resolve_schema); + let provider = Arc::new(ListingTable::try_new(config)?); + let current_session_table_provider = self.build_table_handle_provider()?; + let metadata_provider = + MetadataProvider::new(provider, current_session_table_provider, self.func_manager.clone(), session.clone()); + + Ok(metadata_provider) + } + + fn build_table_handle_provider(&self) -> QueryResult { + let current_session_table_provider: Arc = Arc::new(BaseTableProvider::default()); + + Ok(current_session_table_provider) + } +} + +pub struct TrackedRecordBatchStream { + inner: SendableRecordBatchStream, +} + +impl RecordBatchStream for TrackedRecordBatchStream { + fn schema(&self) -> SchemaRef { + self.inner.schema() + } +} + +impl Stream for TrackedRecordBatchStream { + type Item = DFResult; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_next_unpin(cx) + } +} + +#[derive(Default, Clone)] +pub struct SimpleQueryDispatcherBuilder { + input: Option, + default_table_provider: Option, + session_factory: Option>, + parser: Option>, + + query_execution_factory: Option, + + func_manager: Option, +} + +impl SimpleQueryDispatcherBuilder { + pub fn with_input(mut self, input: SelectObjectContentInput) -> Self { + self.input = Some(input); + self + } + pub fn with_default_table_provider(mut self, default_table_provider: TableHandleProviderRef) -> Self { + self.default_table_provider = Some(default_table_provider); + self + } + + pub fn with_session_factory(mut self, session_factory: Arc) -> Self { + self.session_factory = Some(session_factory); + self + } + + pub fn with_parser(mut self, parser: Arc) -> Self { + self.parser = Some(parser); + self + } + + pub fn with_query_execution_factory(mut self, query_execution_factory: QueryExecutionFactoryRef) -> Self { + self.query_execution_factory = Some(query_execution_factory); + self + } + + pub fn with_func_manager(mut self, func_manager: FuncMetaManagerRef) -> Self { + self.func_manager = Some(func_manager); + self + } + + pub fn build(self) -> QueryResult> { + let input = self.input.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of input".to_string(), + })?; + + let session_factory = self.session_factory.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of session_factory".to_string(), + })?; + + let parser = self.parser.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of parser".to_string(), + })?; + + let query_execution_factory = self.query_execution_factory.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of query_execution_factory".to_string(), + })?; + + let func_manager = self.func_manager.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of func_manager".to_string(), + })?; + + let default_table_provider = self.default_table_provider.ok_or_else(|| QueryError::BuildQueryDispatcher { + err: "lost of default_table_provider".to_string(), + })?; + + let dispatcher = Arc::new(SimpleQueryDispatcher { + input, + _default_table_provider: default_table_provider, + session_factory, + parser, + query_execution_factory, + func_manager, + }); + + Ok(dispatcher) + } +} diff --git a/s3select/query/src/dispatcher/mod.rs b/s3select/query/src/dispatcher/mod.rs new file mode 100644 index 00000000..ff8de9eb --- /dev/null +++ b/s3select/query/src/dispatcher/mod.rs @@ -0,0 +1 @@ +pub mod manager; diff --git a/s3select/query/src/execution/factory.rs b/s3select/query/src/execution/factory.rs new file mode 100644 index 00000000..9960d68a --- /dev/null +++ b/s3select/query/src/execution/factory.rs @@ -0,0 +1,46 @@ +use std::sync::Arc; + +use api::{ + query::{ + execution::{QueryExecutionFactory, QueryExecutionRef, QueryStateMachineRef}, + logical_planner::Plan, + optimizer::Optimizer, + scheduler::SchedulerRef, + }, + QueryError, +}; +use async_trait::async_trait; + +use super::query::SqlQueryExecution; + +pub type QueryExecutionFactoryRef = Arc; + +pub struct SqlQueryExecutionFactory { + optimizer: Arc, + scheduler: SchedulerRef, +} + +impl SqlQueryExecutionFactory { + #[inline(always)] + pub fn new(optimizer: Arc, scheduler: SchedulerRef) -> Self { + Self { optimizer, scheduler } + } +} + +#[async_trait] +impl QueryExecutionFactory for SqlQueryExecutionFactory { + async fn create_query_execution( + &self, + plan: Plan, + state_machine: QueryStateMachineRef, + ) -> Result { + match plan { + Plan::Query(query_plan) => Ok(Arc::new(SqlQueryExecution::new( + state_machine, + query_plan, + self.optimizer.clone(), + self.scheduler.clone(), + ))), + } + } +} diff --git a/s3select/query/src/execution/mod.rs b/s3select/query/src/execution/mod.rs new file mode 100644 index 00000000..807faf3e --- /dev/null +++ b/s3select/query/src/execution/mod.rs @@ -0,0 +1,3 @@ +pub mod factory; +pub mod query; +pub mod scheduler; diff --git a/s3select/query/src/execution/query.rs b/s3select/query/src/execution/query.rs new file mode 100644 index 00000000..15d6ef83 --- /dev/null +++ b/s3select/query/src/execution/query.rs @@ -0,0 +1,92 @@ +use std::sync::Arc; + +use api::query::execution::{Output, QueryExecution, QueryStateMachineRef}; +use api::query::logical_planner::QueryPlan; +use api::query::optimizer::Optimizer; +use api::query::scheduler::SchedulerRef; +use api::{QueryError, QueryResult}; +use async_trait::async_trait; +use futures::stream::AbortHandle; +use parking_lot::Mutex; +use tracing::debug; + +pub struct SqlQueryExecution { + query_state_machine: QueryStateMachineRef, + plan: QueryPlan, + optimizer: Arc, + scheduler: SchedulerRef, + + abort_handle: Mutex>, +} + +impl SqlQueryExecution { + pub fn new( + query_state_machine: QueryStateMachineRef, + plan: QueryPlan, + optimizer: Arc, + scheduler: SchedulerRef, + ) -> Self { + Self { + query_state_machine, + plan, + optimizer, + scheduler, + abort_handle: Mutex::new(None), + } + } + + async fn start(&self) -> QueryResult { + // begin optimize + self.query_state_machine.begin_optimize(); + let physical_plan = self.optimizer.optimize(&self.plan, &self.query_state_machine.session).await?; + self.query_state_machine.end_optimize(); + + // begin schedule + self.query_state_machine.begin_schedule(); + let stream = self + .scheduler + .schedule(physical_plan.clone(), self.query_state_machine.session.inner().task_ctx()) + .await? + .stream(); + + debug!("Success build result stream."); + self.query_state_machine.end_schedule(); + + Ok(Output::StreamData(stream)) + } +} + +#[async_trait] +impl QueryExecution for SqlQueryExecution { + async fn start(&self) -> QueryResult { + let (task, abort_handle) = futures::future::abortable(self.start()); + + { + *self.abort_handle.lock() = Some(abort_handle); + } + + task.await.map_err(|_| QueryError::Cancel)? + } + + fn cancel(&self) -> QueryResult<()> { + debug!( + "cancel sql query execution: sql: {}, state: {:?}", + self.query_state_machine.query.content(), + self.query_state_machine.state() + ); + + // change state + self.query_state_machine.cancel(); + // stop future task + if let Some(e) = self.abort_handle.lock().as_ref() { + e.abort() + }; + + debug!( + "canceled sql query execution: sql: {}, state: {:?}", + self.query_state_machine.query.content(), + self.query_state_machine.state() + ); + Ok(()) + } +} diff --git a/s3select/query/src/execution/scheduler/local.rs b/s3select/query/src/execution/scheduler/local.rs new file mode 100644 index 00000000..e105d4b9 --- /dev/null +++ b/s3select/query/src/execution/scheduler/local.rs @@ -0,0 +1,22 @@ +use std::sync::Arc; + +use api::query::scheduler::{ExecutionResults, Scheduler}; +use async_trait::async_trait; +use datafusion::error::DataFusionError; +use datafusion::execution::context::TaskContext; +use datafusion::physical_plan::{execute_stream, ExecutionPlan}; + +pub struct LocalScheduler {} + +#[async_trait] +impl Scheduler for LocalScheduler { + async fn schedule( + &self, + plan: Arc, + context: Arc, + ) -> Result { + let stream = execute_stream(plan, context)?; + + Ok(ExecutionResults::new(stream)) + } +} diff --git a/s3select/query/src/execution/scheduler/mod.rs b/s3select/query/src/execution/scheduler/mod.rs new file mode 100644 index 00000000..27099624 --- /dev/null +++ b/s3select/query/src/execution/scheduler/mod.rs @@ -0,0 +1 @@ +pub mod local; diff --git a/s3select/query/src/function/mod.rs b/s3select/query/src/function/mod.rs new file mode 100644 index 00000000..e76614a0 --- /dev/null +++ b/s3select/query/src/function/mod.rs @@ -0,0 +1 @@ +pub mod simple_func_manager; diff --git a/s3select/query/src/function/simple_func_manager.rs b/s3select/query/src/function/simple_func_manager.rs new file mode 100644 index 00000000..129efacf --- /dev/null +++ b/s3select/query/src/function/simple_func_manager.rs @@ -0,0 +1,63 @@ +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use api::query::function::FunctionMetadataManager; +use api::{QueryError, QueryResult}; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, WindowUDF}; + +pub type SimpleFunctionMetadataManagerRef = Arc; + +#[derive(Debug, Default)] +pub struct SimpleFunctionMetadataManager { + /// Scalar functions that are registered with the context + pub scalar_functions: HashMap>, + /// Aggregate functions registered in the context + pub aggregate_functions: HashMap>, + /// Window functions registered in the context + pub window_functions: HashMap>, +} + +impl FunctionMetadataManager for SimpleFunctionMetadataManager { + fn register_udf(&mut self, f: ScalarUDF) -> QueryResult<()> { + self.scalar_functions.insert(f.inner().name().to_uppercase(), Arc::new(f)); + Ok(()) + } + + fn register_udaf(&mut self, f: AggregateUDF) -> QueryResult<()> { + self.aggregate_functions.insert(f.inner().name().to_uppercase(), Arc::new(f)); + Ok(()) + } + + fn register_udwf(&mut self, f: WindowUDF) -> QueryResult<()> { + self.window_functions.insert(f.inner().name().to_uppercase(), Arc::new(f)); + Ok(()) + } + + fn udf(&self, name: &str) -> QueryResult> { + let result = self.scalar_functions.get(&name.to_uppercase()); + + result + .cloned() + .ok_or_else(|| QueryError::FunctionExists { name: name.to_string() }) + } + + fn udaf(&self, name: &str) -> QueryResult> { + let result = self.aggregate_functions.get(&name.to_uppercase()); + + result + .cloned() + .ok_or_else(|| QueryError::FunctionNotExists { name: name.to_string() }) + } + + fn udwf(&self, name: &str) -> QueryResult> { + let result = self.window_functions.get(&name.to_uppercase()); + + result + .cloned() + .ok_or_else(|| QueryError::FunctionNotExists { name: name.to_string() }) + } + + fn udfs(&self) -> HashSet { + self.scalar_functions.keys().cloned().collect() + } +} diff --git a/s3select/query/src/instance.rs b/s3select/query/src/instance.rs new file mode 100644 index 00000000..03cc7b03 --- /dev/null +++ b/s3select/query/src/instance.rs @@ -0,0 +1,164 @@ +use std::sync::Arc; + +use api::{ + query::{ + dispatcher::QueryDispatcher, execution::QueryStateMachineRef, logical_planner::Plan, session::SessionCtxFactory, Query, + }, + server::dbms::{DatabaseManagerSystem, QueryHandle}, + QueryResult, +}; +use async_trait::async_trait; +use derive_builder::Builder; +use s3s::dto::SelectObjectContentInput; + +use crate::{ + dispatcher::manager::SimpleQueryDispatcherBuilder, + execution::{factory::SqlQueryExecutionFactory, scheduler::local::LocalScheduler}, + function::simple_func_manager::SimpleFunctionMetadataManager, + metadata::base_table::BaseTableProvider, + sql::{optimizer::CascadeOptimizerBuilder, parser::DefaultParser}, +}; + +#[derive(Builder)] +pub struct RustFSms { + // query dispatcher & query execution + query_dispatcher: Arc, +} + +#[async_trait] +impl DatabaseManagerSystem for RustFSms +where + D: QueryDispatcher, +{ + async fn execute(&self, query: &Query) -> QueryResult { + let result = self.query_dispatcher.execute_query(query).await?; + + Ok(QueryHandle::new(query.clone(), result)) + } + + async fn build_query_state_machine(&self, query: Query) -> QueryResult { + let query_state_machine = self.query_dispatcher.build_query_state_machine(query).await?; + + Ok(query_state_machine) + } + + async fn build_logical_plan(&self, query_state_machine: QueryStateMachineRef) -> QueryResult> { + let logical_plan = self.query_dispatcher.build_logical_plan(query_state_machine).await?; + + Ok(logical_plan) + } + + async fn execute_logical_plan( + &self, + logical_plan: Plan, + query_state_machine: QueryStateMachineRef, + ) -> QueryResult { + let query = query_state_machine.query.clone(); + let result = self + .query_dispatcher + .execute_logical_plan(logical_plan, query_state_machine) + .await?; + + Ok(QueryHandle::new(query.clone(), result)) + } +} + +pub async fn make_rustfsms(input: SelectObjectContentInput, is_test: bool) -> QueryResult { + // init Function Manager, we can define some UDF if need + let func_manager = SimpleFunctionMetadataManager::default(); + // TODO session config need load global system config + let session_factory = Arc::new(SessionCtxFactory { is_test }); + let parser = Arc::new(DefaultParser::default()); + let optimizer = Arc::new(CascadeOptimizerBuilder::default().build()); + // TODO wrap, and num_threads configurable + let scheduler = Arc::new(LocalScheduler {}); + + let query_execution_factory = Arc::new(SqlQueryExecutionFactory::new(optimizer, scheduler)); + + let default_table_provider = Arc::new(BaseTableProvider::default()); + + let query_dispatcher = SimpleQueryDispatcherBuilder::default() + .with_input(input) + .with_func_manager(Arc::new(func_manager)) + .with_default_table_provider(default_table_provider) + .with_session_factory(session_factory) + .with_parser(parser) + .with_query_execution_factory(query_execution_factory) + .build()?; + + let mut builder = RustFSmsBuilder::default(); + + let db_server = builder.query_dispatcher(query_dispatcher).build().expect("build db server"); + + Ok(db_server) +} + +#[cfg(test)] +mod tests { + use api::{ + query::{Context, Query}, + server::dbms::DatabaseManagerSystem, + }; + use datafusion::{arrow::util::pretty, assert_batches_eq}; + use s3s::dto::{ + CSVInput, CSVOutput, ExpressionType, InputSerialization, OutputSerialization, SelectObjectContentInput, + SelectObjectContentRequest, + }; + + use crate::instance::make_rustfsms; + + #[tokio::test] + #[ignore] + async fn test_simple_sql() { + let sql = "select * from S3Object"; + let input = SelectObjectContentInput { + bucket: "dandan".to_string(), + expected_bucket_owner: None, + key: "test.csv".to_string(), + sse_customer_algorithm: None, + sse_customer_key: None, + sse_customer_key_md5: None, + request: SelectObjectContentRequest { + expression: sql.to_string(), + expression_type: ExpressionType::from_static("SQL"), + input_serialization: InputSerialization { + csv: Some(CSVInput::default()), + ..Default::default() + }, + output_serialization: OutputSerialization { + csv: Some(CSVOutput::default()), + ..Default::default() + }, + request_progress: None, + scan_range: None, + }, + }; + let db = make_rustfsms(input.clone(), true).await.unwrap(); + let query = Query::new(Context { input }, sql.to_string()); + + let result = db.execute(&query).await.unwrap(); + + let results = result.result().chunk_result().await.unwrap().to_vec(); + + let expected = [ + "+----------------+----------+----------+------------+----------+", + "| column_1 | column_2 | column_3 | column_4 | column_5 |", + "+----------------+----------+----------+------------+----------+", + "| id | name | age | department | salary |", + "| 1 | Alice | 25 | HR | 5000 |", + "| 2 | Bob | 30 | IT | 6000 |", + "| 3 | Charlie | 35 | Finance | 7000 |", + "| 4 | Diana | 22 | Marketing | 4500 |", + "| 5 | Eve | 28 | IT | 5500 |", + "| 6 | Frank | 40 | Finance | 8000 |", + "| 7 | Grace | 26 | HR | 5200 |", + "| 8 | Henry | 32 | IT | 6200 |", + "| 9 | Ivy | 24 | Marketing | 4800 |", + "| 10 | Jack | 38 | Finance | 7500 |", + "+----------------+----------+----------+------------+----------+", + ]; + + assert_batches_eq!(expected, &results); + pretty::print_batches(&results).unwrap(); + } +} diff --git a/s3select/query/src/lib.rs b/s3select/query/src/lib.rs new file mode 100644 index 00000000..0a0c12eb --- /dev/null +++ b/s3select/query/src/lib.rs @@ -0,0 +1,7 @@ +pub mod data_source; +pub mod dispatcher; +pub mod execution; +pub mod function; +pub mod instance; +pub mod metadata; +pub mod sql; diff --git a/s3select/query/src/metadata/base_table.rs b/s3select/query/src/metadata/base_table.rs new file mode 100644 index 00000000..38dc841a --- /dev/null +++ b/s3select/query/src/metadata/base_table.rs @@ -0,0 +1,17 @@ +use std::sync::Arc; + +use datafusion::common::Result as DFResult; +use datafusion::datasource::listing::ListingTable; + +use crate::data_source::table_source::TableHandle; + +use super::TableHandleProvider; + +#[derive(Default)] +pub struct BaseTableProvider {} + +impl TableHandleProvider for BaseTableProvider { + fn build_table_handle(&self, provider: Arc) -> DFResult { + Ok(TableHandle::External(provider)) + } +} diff --git a/s3select/query/src/metadata/mod.rs b/s3select/query/src/metadata/mod.rs new file mode 100644 index 00000000..78c79e36 --- /dev/null +++ b/s3select/query/src/metadata/mod.rs @@ -0,0 +1,126 @@ +use std::sync::Arc; + +use api::query::{function::FuncMetaManagerRef, session::SessionCtx}; +use async_trait::async_trait; +use datafusion::arrow::datatypes::DataType; +use datafusion::common::Result as DFResult; +use datafusion::datasource::listing::ListingTable; +use datafusion::logical_expr::var_provider::is_system_variables; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; +use datafusion::variable::VarType; +use datafusion::{ + config::ConfigOptions, + sql::{planner::ContextProvider, TableReference}, +}; + +use crate::data_source::table_source::{TableHandle, TableSourceAdapter}; + +pub mod base_table; + +#[async_trait] +pub trait ContextProviderExtension: ContextProvider { + fn get_table_source_(&self, name: TableReference) -> datafusion::common::Result>; +} + +pub type TableHandleProviderRef = Arc; + +pub trait TableHandleProvider { + fn build_table_handle(&self, provider: Arc) -> DFResult; +} + +pub struct MetadataProvider { + provider: Arc, + session: SessionCtx, + config_options: ConfigOptions, + func_manager: FuncMetaManagerRef, + current_session_table_provider: TableHandleProviderRef, +} + +impl MetadataProvider { + #[allow(clippy::too_many_arguments)] + pub fn new( + provider: Arc, + current_session_table_provider: TableHandleProviderRef, + func_manager: FuncMetaManagerRef, + session: SessionCtx, + ) -> Self { + Self { + provider, + current_session_table_provider, + config_options: session.inner().config_options().clone(), + session, + func_manager, + } + } + + fn build_table_handle(&self) -> datafusion::common::Result { + self.current_session_table_provider.build_table_handle(self.provider.clone()) + } +} + +impl ContextProviderExtension for MetadataProvider { + fn get_table_source_(&self, table_ref: TableReference) -> datafusion::common::Result> { + let name = table_ref.clone().resolve("", ""); + let table_name = &*name.table; + + let table_handle = self.build_table_handle()?; + + Ok(Arc::new(TableSourceAdapter::try_new(table_ref.clone(), table_name, table_handle)?)) + } +} + +impl ContextProvider for MetadataProvider { + fn get_function_meta(&self, name: &str) -> Option> { + self.func_manager + .udf(name) + .ok() + .or(self.session.inner().scalar_functions().get(name).cloned()) + } + + fn get_aggregate_meta(&self, name: &str) -> Option> { + self.func_manager.udaf(name).ok() + } + + fn get_variable_type(&self, variable_names: &[String]) -> Option { + if variable_names.is_empty() { + return None; + } + + let var_type = if is_system_variables(variable_names) { + VarType::System + } else { + VarType::UserDefined + }; + + self.session + .inner() + .execution_props() + .get_var_provider(var_type) + .and_then(|p| p.get_type(variable_names)) + } + + fn options(&self) -> &ConfigOptions { + // TODO refactor + &self.config_options + } + + fn get_window_meta(&self, name: &str) -> Option> { + self.func_manager.udwf(name).ok() + } + + fn get_table_source(&self, name: TableReference) -> DFResult> { + Ok(self.get_table_source_(name)?) + } + + fn udf_names(&self) -> Vec { + todo!() + } + + fn udaf_names(&self) -> Vec { + todo!() + } + + fn udwf_names(&self) -> Vec { + todo!() + } +} diff --git a/s3select/query/src/sql/analyzer.rs b/s3select/query/src/sql/analyzer.rs new file mode 100644 index 00000000..6507c842 --- /dev/null +++ b/s3select/query/src/sql/analyzer.rs @@ -0,0 +1,33 @@ +use api::query::analyzer::Analyzer; +use api::query::session::SessionCtx; +use api::QueryResult; +use datafusion::logical_expr::LogicalPlan; +use datafusion::optimizer::analyzer::Analyzer as DFAnalyzer; + +pub struct DefaultAnalyzer { + inner: DFAnalyzer, +} + +impl DefaultAnalyzer { + pub fn new() -> Self { + let analyzer = DFAnalyzer::default(); + // we can add analyzer rule at here + + Self { inner: analyzer } + } +} + +impl Default for DefaultAnalyzer { + fn default() -> Self { + Self::new() + } +} + +impl Analyzer for DefaultAnalyzer { + fn analyze(&self, plan: &LogicalPlan, session: &SessionCtx) -> QueryResult { + let plan = self + .inner + .execute_and_check(plan.to_owned(), session.inner().config_options(), |_, _| {})?; + Ok(plan) + } +} diff --git a/s3select/query/src/sql/dialect.rs b/s3select/query/src/sql/dialect.rs new file mode 100644 index 00000000..33297093 --- /dev/null +++ b/s3select/query/src/sql/dialect.rs @@ -0,0 +1,18 @@ +use datafusion::sql::sqlparser::dialect::Dialect; + +#[derive(Debug, Default)] +pub struct RustFsDialect; + +impl Dialect for RustFsDialect { + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '@' || ch == '$' || ch == '#' || ch == '_' + } + + fn supports_group_by_expr(&self) -> bool { + true + } +} diff --git a/s3select/query/src/sql/logical/mod.rs b/s3select/query/src/sql/logical/mod.rs new file mode 100644 index 00000000..1ecfae43 --- /dev/null +++ b/s3select/query/src/sql/logical/mod.rs @@ -0,0 +1,2 @@ +pub mod optimizer; +pub mod planner; diff --git a/s3select/query/src/sql/logical/optimizer.rs b/s3select/query/src/sql/logical/optimizer.rs new file mode 100644 index 00000000..e97e2967 --- /dev/null +++ b/s3select/query/src/sql/logical/optimizer.rs @@ -0,0 +1,111 @@ +use std::sync::Arc; + +use api::{ + query::{analyzer::AnalyzerRef, logical_planner::QueryPlan, session::SessionCtx}, + QueryResult, +}; +use datafusion::{ + execution::SessionStateBuilder, + logical_expr::LogicalPlan, + optimizer::{ + common_subexpr_eliminate::CommonSubexprEliminate, decorrelate_predicate_subquery::DecorrelatePredicateSubquery, + eliminate_cross_join::EliminateCrossJoin, eliminate_duplicated_expr::EliminateDuplicatedExpr, + eliminate_filter::EliminateFilter, eliminate_join::EliminateJoin, eliminate_limit::EliminateLimit, + eliminate_outer_join::EliminateOuterJoin, extract_equijoin_predicate::ExtractEquijoinPredicate, + filter_null_join_keys::FilterNullJoinKeys, propagate_empty_relation::PropagateEmptyRelation, + push_down_filter::PushDownFilter, push_down_limit::PushDownLimit, + replace_distinct_aggregate::ReplaceDistinctWithAggregate, scalar_subquery_to_join::ScalarSubqueryToJoin, + simplify_expressions::SimplifyExpressions, single_distinct_to_groupby::SingleDistinctToGroupBy, + unwrap_cast_in_comparison::UnwrapCastInComparison, OptimizerRule, + }, +}; +use tracing::debug; + +use crate::sql::analyzer::DefaultAnalyzer; + +pub trait LogicalOptimizer: Send + Sync { + fn optimize(&self, plan: &QueryPlan, session: &SessionCtx) -> QueryResult; + + fn inject_optimizer_rule(&mut self, optimizer_rule: Arc); +} + +pub struct DefaultLogicalOptimizer { + // fit datafusion + // TODO refactor + analyzer: AnalyzerRef, + rules: Vec>, +} + +impl DefaultLogicalOptimizer { + #[allow(dead_code)] + fn with_optimizer_rules(mut self, rules: Vec>) -> Self { + self.rules = rules; + self + } +} + +impl Default for DefaultLogicalOptimizer { + fn default() -> Self { + let analyzer = Arc::new(DefaultAnalyzer::default()); + + // additional optimizer rule + let rules: Vec> = vec![ + // df default rules start + Arc::new(SimplifyExpressions::new()), + Arc::new(UnwrapCastInComparison::new()), + Arc::new(ReplaceDistinctWithAggregate::new()), + Arc::new(EliminateJoin::new()), + Arc::new(DecorrelatePredicateSubquery::new()), + Arc::new(ScalarSubqueryToJoin::new()), + Arc::new(ExtractEquijoinPredicate::new()), + // simplify expressions does not simplify expressions in subqueries, so we + // run it again after running the optimizations that potentially converted + // subqueries to joins + Arc::new(SimplifyExpressions::new()), + Arc::new(EliminateDuplicatedExpr::new()), + Arc::new(EliminateFilter::new()), + Arc::new(EliminateCrossJoin::new()), + Arc::new(CommonSubexprEliminate::new()), + Arc::new(EliminateLimit::new()), + Arc::new(PropagateEmptyRelation::new()), + Arc::new(FilterNullJoinKeys::default()), + Arc::new(EliminateOuterJoin::new()), + // Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit + Arc::new(PushDownLimit::new()), + Arc::new(PushDownFilter::new()), + Arc::new(SingleDistinctToGroupBy::new()), + // The previous optimizations added expressions and projections, + // that might benefit from the following rules + Arc::new(SimplifyExpressions::new()), + Arc::new(UnwrapCastInComparison::new()), + Arc::new(CommonSubexprEliminate::new()), + // PushDownProjection can pushdown Projections through Limits, do PushDownLimit again. + Arc::new(PushDownLimit::new()), + // df default rules end + // custom rules can add at here + ]; + + Self { analyzer, rules } + } +} + +impl LogicalOptimizer for DefaultLogicalOptimizer { + fn optimize(&self, plan: &QueryPlan, session: &SessionCtx) -> QueryResult { + let analyzed_plan = { self.analyzer.analyze(&plan.df_plan, session)? }; + + debug!("Analyzed logical plan:\n{}\n", plan.df_plan.display_indent_schema(),); + + let optimizeed_plan = { + SessionStateBuilder::new_from_existing(session.inner().clone()) + .with_optimizer_rules(self.rules.clone()) + .build() + .optimize(&analyzed_plan)? + }; + + Ok(optimizeed_plan) + } + + fn inject_optimizer_rule(&mut self, optimizer_rule: Arc) { + self.rules.push(optimizer_rule); + } +} diff --git a/s3select/query/src/sql/logical/planner.rs b/s3select/query/src/sql/logical/planner.rs new file mode 100644 index 00000000..bcdc59c0 --- /dev/null +++ b/s3select/query/src/sql/logical/planner.rs @@ -0,0 +1,3 @@ +use crate::sql::planner::SqlPlanner; + +pub type DefaultLogicalPlanner<'a, S> = SqlPlanner<'a, S>; diff --git a/s3select/query/src/sql/mod.rs b/s3select/query/src/sql/mod.rs new file mode 100644 index 00000000..151fc83d --- /dev/null +++ b/s3select/query/src/sql/mod.rs @@ -0,0 +1,7 @@ +pub mod analyzer; +pub mod dialect; +pub mod logical; +pub mod optimizer; +pub mod parser; +pub mod physical; +pub mod planner; diff --git a/s3select/query/src/sql/optimizer.rs b/s3select/query/src/sql/optimizer.rs new file mode 100644 index 00000000..b424b073 --- /dev/null +++ b/s3select/query/src/sql/optimizer.rs @@ -0,0 +1,82 @@ +use std::sync::Arc; + +use api::{ + query::{logical_planner::QueryPlan, optimizer::Optimizer, physical_planner::PhysicalPlanner, session::SessionCtx}, + QueryResult, +}; +use async_trait::async_trait; +use datafusion::physical_plan::{displayable, ExecutionPlan}; +use tracing::debug; + +use super::{ + logical::optimizer::{DefaultLogicalOptimizer, LogicalOptimizer}, + physical::{optimizer::PhysicalOptimizer, planner::DefaultPhysicalPlanner}, +}; + +pub struct CascadeOptimizer { + logical_optimizer: Arc, + physical_planner: Arc, + physical_optimizer: Arc, +} + +#[async_trait] +impl Optimizer for CascadeOptimizer { + async fn optimize(&self, plan: &QueryPlan, session: &SessionCtx) -> QueryResult> { + debug!("Original logical plan:\n{}\n", plan.df_plan.display_indent_schema(),); + + let optimized_logical_plan = self.logical_optimizer.optimize(plan, session)?; + + debug!("Final logical plan:\n{}\n", optimized_logical_plan.display_indent_schema(),); + + let physical_plan = { + self.physical_planner + .create_physical_plan(&optimized_logical_plan, session) + .await? + }; + + debug!("Original physical plan:\n{}\n", displayable(physical_plan.as_ref()).indent(false)); + + let optimized_physical_plan = { self.physical_optimizer.optimize(physical_plan, session)? }; + + Ok(optimized_physical_plan) + } +} + +#[derive(Default)] +pub struct CascadeOptimizerBuilder { + logical_optimizer: Option>, + physical_planner: Option>, + physical_optimizer: Option>, +} + +impl CascadeOptimizerBuilder { + pub fn with_logical_optimizer(mut self, logical_optimizer: Arc) -> Self { + self.logical_optimizer = Some(logical_optimizer); + self + } + + pub fn with_physical_planner(mut self, physical_planner: Arc) -> Self { + self.physical_planner = Some(physical_planner); + self + } + + pub fn with_physical_optimizer(mut self, physical_optimizer: Arc) -> Self { + self.physical_optimizer = Some(physical_optimizer); + self + } + + pub fn build(self) -> CascadeOptimizer { + let default_logical_optimizer = Arc::new(DefaultLogicalOptimizer::default()); + let default_physical_planner = Arc::new(DefaultPhysicalPlanner::default()); + + let logical_optimizer = self.logical_optimizer.unwrap_or(default_logical_optimizer); + let physical_planner = self.physical_planner.unwrap_or_else(|| default_physical_planner.clone()); + let physical_optimizer = self.physical_optimizer.unwrap_or(default_physical_planner); + + CascadeOptimizer { + logical_optimizer, + physical_planner, + physical_optimizer, + } + } +} diff --git a/s3select/query/src/sql/parser.rs b/s3select/query/src/sql/parser.rs new file mode 100644 index 00000000..ebd2b5d4 --- /dev/null +++ b/s3select/query/src/sql/parser.rs @@ -0,0 +1,92 @@ +use std::{collections::VecDeque, fmt::Display}; + +use api::{ + query::{ast::ExtStatement, parser::Parser as RustFsParser}, + ParserSnafu, +}; +use datafusion::sql::sqlparser::{ + dialect::Dialect, + parser::{Parser, ParserError}, + tokenizer::{Token, Tokenizer}, +}; +use snafu::ResultExt; + +use super::dialect::RustFsDialect; + +pub type Result = std::result::Result; + +// Use `Parser::expected` instead, if possible +macro_rules! parser_err { + ($MSG:expr) => { + Err(ParserError::ParserError($MSG.to_string())) + }; +} + +#[derive(Default)] +pub struct DefaultParser {} + +impl RustFsParser for DefaultParser { + fn parse(&self, sql: &str) -> api::QueryResult> { + ExtParser::parse_sql(sql).context(ParserSnafu) + } +} + +/// SQL Parser +pub struct ExtParser<'a> { + parser: Parser<'a>, +} + +impl<'a> ExtParser<'a> { + /// Parse the specified tokens with dialect + fn new_with_dialect(sql: &str, dialect: &'a dyn Dialect) -> Result { + let mut tokenizer = Tokenizer::new(dialect, sql); + let tokens = tokenizer.tokenize()?; + Ok(ExtParser { + parser: Parser::new(dialect).with_tokens(tokens), + }) + } + + /// Parse a SQL statement and produce a set of statements + pub fn parse_sql(sql: &str) -> Result> { + let dialect = &RustFsDialect {}; + ExtParser::parse_sql_with_dialect(sql, dialect) + } + + /// Parse a SQL statement and produce a set of statements + pub fn parse_sql_with_dialect(sql: &str, dialect: &dyn Dialect) -> Result> { + let mut parser = ExtParser::new_with_dialect(sql, dialect)?; + let mut stmts = VecDeque::new(); + let mut expecting_statement_delimiter = false; + loop { + // ignore empty statements (between successive statement delimiters) + while parser.parser.consume_token(&Token::SemiColon) { + expecting_statement_delimiter = false; + } + + if parser.parser.peek_token() == Token::EOF { + break; + } + if expecting_statement_delimiter { + return parser.expected("end of statement", parser.parser.peek_token()); + } + + let statement = parser.parse_statement()?; + stmts.push_back(statement); + expecting_statement_delimiter = true; + } + + // debug!("Parser sql: {}, stmts: {:#?}", sql, stmts); + + Ok(stmts) + } + + /// Parse a new expression + fn parse_statement(&mut self) -> Result { + Ok(ExtStatement::SqlStatement(Box::new(self.parser.parse_statement()?))) + } + + // Report unexpected token + fn expected(&self, expected: &str, found: impl Display) -> Result { + parser_err!(format!("Expected {}, found: {}", expected, found)) + } +} diff --git a/s3select/query/src/sql/physical/mod.rs b/s3select/query/src/sql/physical/mod.rs new file mode 100644 index 00000000..1ecfae43 --- /dev/null +++ b/s3select/query/src/sql/physical/mod.rs @@ -0,0 +1,2 @@ +pub mod optimizer; +pub mod planner; diff --git a/s3select/query/src/sql/physical/optimizer.rs b/s3select/query/src/sql/physical/optimizer.rs new file mode 100644 index 00000000..12f16e3d --- /dev/null +++ b/s3select/query/src/sql/physical/optimizer.rs @@ -0,0 +1,12 @@ +use std::sync::Arc; + +use api::query::session::SessionCtx; +use api::QueryResult; +use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::ExecutionPlan; + +pub trait PhysicalOptimizer { + fn optimize(&self, plan: Arc, session: &SessionCtx) -> QueryResult>; + + fn inject_optimizer_rule(&mut self, optimizer_rule: Arc); +} diff --git a/s3select/query/src/sql/physical/planner.rs b/s3select/query/src/sql/physical/planner.rs new file mode 100644 index 00000000..254c198d --- /dev/null +++ b/s3select/query/src/sql/physical/planner.rs @@ -0,0 +1,104 @@ +use std::sync::Arc; + +use api::query::physical_planner::PhysicalPlanner; +use api::query::session::SessionCtx; +use api::QueryResult; +use async_trait::async_trait; +use datafusion::execution::SessionStateBuilder; +use datafusion::logical_expr::LogicalPlan; +use datafusion::physical_optimizer::aggregate_statistics::AggregateStatistics; +use datafusion::physical_optimizer::coalesce_batches::CoalesceBatches; +use datafusion::physical_optimizer::join_selection::JoinSelection; +use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_planner::{ + DefaultPhysicalPlanner as DFDefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner as DFPhysicalPlanner, +}; + +use super::optimizer::PhysicalOptimizer; + +pub struct DefaultPhysicalPlanner { + ext_physical_transform_rules: Vec>, + /// Responsible for optimizing a physical execution plan + ext_physical_optimizer_rules: Vec>, +} + +impl DefaultPhysicalPlanner { + #[allow(dead_code)] + fn with_physical_transform_rules(mut self, rules: Vec>) -> Self { + self.ext_physical_transform_rules = rules; + self + } +} + +impl DefaultPhysicalPlanner { + #[allow(dead_code)] + fn with_optimizer_rules(mut self, rules: Vec>) -> Self { + self.ext_physical_optimizer_rules = rules; + self + } +} + +impl Default for DefaultPhysicalPlanner { + fn default() -> Self { + let ext_physical_transform_rules: Vec> = vec![ + // can add rules at here + ]; + + // We need to take care of the rule ordering. They may influence each other. + let ext_physical_optimizer_rules: Vec> = vec![ + Arc::new(AggregateStatistics::new()), + // Statistics-based join selection will change the Auto mode to a real join implementation, + // like collect left, or hash join, or future sort merge join, which will influence the + // EnforceDistribution and EnforceSorting rules as they decide whether to add additional + // repartitioning and local sorting steps to meet distribution and ordering requirements. + // Therefore, it should run before EnforceDistribution and EnforceSorting. + Arc::new(JoinSelection::new()), + // The CoalesceBatches rule will not influence the distribution and ordering of the + // whole plan tree. Therefore, to avoid influencing other rules, it should run last. + Arc::new(CoalesceBatches::new()), + ]; + + Self { + ext_physical_transform_rules, + ext_physical_optimizer_rules, + } + } +} + +#[async_trait] +impl PhysicalPlanner for DefaultPhysicalPlanner { + async fn create_physical_plan( + &self, + logical_plan: &LogicalPlan, + session: &SessionCtx, + ) -> QueryResult> { + // 将扩展的物理计划优化规则注入df 的 session state + let new_state = SessionStateBuilder::new_from_existing(session.inner().clone()) + .with_physical_optimizer_rules(self.ext_physical_optimizer_rules.clone()) + .build(); + + // 通过扩展的物理计划转换规则构造df 的 Physical Planner + let planner = DFDefaultPhysicalPlanner::with_extension_planners(self.ext_physical_transform_rules.clone()); + + // 执行df的物理计划规划及优化 + planner + .create_physical_plan(logical_plan, &new_state) + .await + .map_err(|e| e.into()) + } + + fn inject_physical_transform_rule(&mut self, rule: Arc) { + self.ext_physical_transform_rules.push(rule) + } +} + +impl PhysicalOptimizer for DefaultPhysicalPlanner { + fn optimize(&self, plan: Arc, _session: &SessionCtx) -> QueryResult> { + Ok(plan) + } + + fn inject_optimizer_rule(&mut self, optimizer_rule: Arc) { + self.ext_physical_optimizer_rules.push(optimizer_rule); + } +} diff --git a/s3select/query/src/sql/planner.rs b/s3select/query/src/sql/planner.rs new file mode 100644 index 00000000..a6c9f8c1 --- /dev/null +++ b/s3select/query/src/sql/planner.rs @@ -0,0 +1,60 @@ +use api::{ + query::{ + ast::ExtStatement, + logical_planner::{LogicalPlanner, Plan, QueryPlan}, + session::SessionCtx, + }, + QueryError, QueryResult, +}; +use async_recursion::async_recursion; +use async_trait::async_trait; +use datafusion::sql::{planner::SqlToRel, sqlparser::ast::Statement}; + +use crate::metadata::ContextProviderExtension; + +pub struct SqlPlanner<'a, S: ContextProviderExtension> { + _schema_provider: &'a S, + df_planner: SqlToRel<'a, S>, +} + +#[async_trait] +impl LogicalPlanner for SqlPlanner<'_, S> { + async fn create_logical_plan(&self, statement: ExtStatement, session: &SessionCtx) -> QueryResult { + let plan = { self.statement_to_plan(statement, session).await? }; + + Ok(plan) + } +} + +impl<'a, S: ContextProviderExtension + Send + Sync + 'a> SqlPlanner<'a, S> { + /// Create a new query planner + pub fn new(schema_provider: &'a S) -> Self { + SqlPlanner { + _schema_provider: schema_provider, + df_planner: SqlToRel::new(schema_provider), + } + } + + /// Generate a logical plan from an Extent SQL statement + #[async_recursion] + pub(crate) async fn statement_to_plan(&self, statement: ExtStatement, session: &SessionCtx) -> QueryResult { + match statement { + ExtStatement::SqlStatement(stmt) => self.df_sql_to_plan(*stmt, session).await, + } + } + + async fn df_sql_to_plan(&self, stmt: Statement, _session: &SessionCtx) -> QueryResult { + match stmt { + Statement::Query(_) => { + let df_plan = self.df_planner.sql_statement_to_plan(stmt)?; + let plan = Plan::Query(QueryPlan { + df_plan, + is_tag_scan: false, + }); + + Ok(plan) + } + _ => Err(QueryError::NotImplemented { err: stmt.to_string() }), + } + } +} diff --git a/scripts/static.sh b/scripts/static.sh index 9fd7469d..814762ec 100755 --- a/scripts/static.sh +++ b/scripts/static.sh @@ -1 +1 @@ -curl -L "https://dl.rustfs.com/console/rustfs-console-latest.zip" -o tempfile.zip && unzip -o tempfile.zip -d ./rustfs/static && rm tempfile.zip \ No newline at end of file +curl -L "https://dl.rustfs.com/artifacts/console/rustfs-console-latest.zip" -o tempfile.zip && unzip -o tempfile.zip -d ./rustfs/static && rm tempfile.zip \ No newline at end of file