mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-04 08:04:07 +00:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1eb1cb1eb | ||
|
|
de41f2b7bf | ||
|
|
a74a0d69f3 | ||
|
|
5f7e166cbf | ||
|
|
d72f5f7ba2 | ||
|
|
b77e6c18e1 | ||
|
|
2ddd3f2356 | ||
|
|
4d3d455d3c | ||
|
|
c9b1c06467 | ||
|
|
b6ae75afb4 | ||
|
|
b6dff20e2f | ||
|
|
2db78c75e4 | ||
|
|
02463ab27b | ||
|
|
adc76347d7 | ||
|
|
3a2bdcda0b | ||
|
|
66bb7985c3 | ||
|
|
2f61c0f5bf | ||
|
|
3ffd0fae47 | ||
|
|
a4a0aa5ea2 | ||
|
|
92cd103f62 |
52
.github/workflows/build-amd.yml
vendored
Normal file
52
.github/workflows/build-amd.yml
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
name: CI (AMD)
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: [
|
||||
'.github/workflows/build-amd.yml',
|
||||
'**/CMakeLists.txt',
|
||||
'**/.cmake',
|
||||
'**/*.h',
|
||||
'**/*.hpp',
|
||||
'**/*.c',
|
||||
'**/*.cpp',
|
||||
'**/*.cu',
|
||||
'**/*.cuh',
|
||||
'**/*.comp'
|
||||
]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
ggml-ci-x64-amd-vulkan:
|
||||
runs-on: [self-hosted, Linux, X64, AMD]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Test
|
||||
id: ggml-ci
|
||||
run: |
|
||||
vulkaninfo --summary
|
||||
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
||||
|
||||
ggml-ci-x64-amd-rocm:
|
||||
runs-on: [self-hosted, Linux, X64, AMD]
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Test
|
||||
id: ggml-ci
|
||||
run: |
|
||||
amd-smi static
|
||||
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
||||
44
.github/workflows/build-linux-cross.yml
vendored
44
.github/workflows/build-linux-cross.yml
vendored
@@ -253,3 +253,47 @@ jobs:
|
||||
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
|
||||
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
ubuntu-24-riscv64-cpu-spacemit-ime-cross:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
env:
|
||||
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
|
||||
SPACEMIT_IME_TOOLCHAIN_PATH: "spacemit-toolchain-linux-glibc-x86_64"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Toolchain
|
||||
uses: actions/cache@v4
|
||||
id: cache-spacemit-ime-cross-toolchain
|
||||
with:
|
||||
path: ./${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
|
||||
key: ${{ runner.os }}-spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
|
||||
|
||||
- name: Setup Toolchain
|
||||
if: steps.cache-spacemit-ime-cross-toolchain.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
wget --quiet --no-check-certificate https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}.tar.xz -O ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz
|
||||
rm -rf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
|
||||
mkdir -p ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
|
||||
tar xf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz -C ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }} --strip-components=1
|
||||
rm -rf ${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}.tar.xz
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
export RISCV_ROOT_PATH=${PWD}/${{ env.SPACEMIT_IME_TOOLCHAIN_PATH }}
|
||||
cmake -B build -DLLAMA_CURL=OFF \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_OPENMP=OFF \
|
||||
-DLLAMA_BUILD_EXAMPLES=ON \
|
||||
-DLLAMA_BUILD_TOOLS=ON \
|
||||
-DLLAMA_BUILD_TESTS=OFF \
|
||||
-DGGML_CPU_RISCV64_SPACEMIT=ON \
|
||||
-DGGML_RVV=ON \
|
||||
-DGGML_RV_ZFH=ON \
|
||||
-DGGML_RV_ZICBOP=ON \
|
||||
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
|
||||
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
60
.github/workflows/build-riscv-native.yml
vendored
60
.github/workflows/build-riscv-native.yml
vendored
@@ -58,3 +58,63 @@ jobs:
|
||||
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
|
||||
|
||||
cmake --build build --config Release -j $(nproc)
|
||||
|
||||
# debian-13-riscv64-spacemit-ime-native: # Bianbu 2.2
|
||||
# runs-on: [self-hosted, RISCV64]
|
||||
|
||||
# steps:
|
||||
# - name: Install prerequisites
|
||||
# run: |
|
||||
# sudo apt-get update || true
|
||||
# sudo apt-get install -y libatomic1
|
||||
# - uses: actions/checkout@v4
|
||||
# - name: Setup Riscv
|
||||
# run: |
|
||||
# sudo apt-get update || true
|
||||
# sudo apt-get install -y --no-install-recommends \
|
||||
# build-essential \
|
||||
# gcc-14-riscv64-linux-gnu \
|
||||
# g++-14-riscv64-linux-gnu \
|
||||
# ccache \
|
||||
# cmake
|
||||
# sudo apt-get upgrade binutils -y
|
||||
|
||||
# - name: Setup ccache
|
||||
# run: |
|
||||
# mkdir -p $HOME/.ccache
|
||||
# ccache -M 5G -d $HOME/.ccache
|
||||
# export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log
|
||||
# export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug"
|
||||
# echo "$GITHUB_WORKSPACE"
|
||||
# echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV
|
||||
# echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV
|
||||
# echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV
|
||||
# echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV
|
||||
|
||||
# - name: Build
|
||||
# run: |
|
||||
# cmake -B build \
|
||||
# -DLLAMA_CURL=OFF \
|
||||
# -DCMAKE_BUILD_TYPE=Release \
|
||||
# -DGGML_OPENMP=OFF \
|
||||
# -DLLAMA_BUILD_EXAMPLES=ON \
|
||||
# -DLLAMA_BUILD_TOOLS=ON \
|
||||
# -DLLAMA_BUILD_TESTS=OFF \
|
||||
# -DCMAKE_SYSTEM_NAME=Linux \
|
||||
# -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
|
||||
# -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
|
||||
# -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
|
||||
# -DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
# -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
|
||||
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
|
||||
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
|
||||
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH \
|
||||
# -DGGML_RVV=ON \
|
||||
# -DGGML_RV_ZFH=ON \
|
||||
# -DGGML_RV_ZICBOP=ON \
|
||||
# -DGGML_CPU_RISCV64_SPACEMIT=ON \
|
||||
# -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1
|
||||
|
||||
# cmake --build build --config Release -j $(nproc)
|
||||
|
||||
28
.github/workflows/build.yml
vendored
28
.github/workflows/build.yml
vendored
@@ -1461,34 +1461,6 @@ jobs:
|
||||
run: |
|
||||
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
||||
|
||||
# ggml-ci-x64-amd-vulkan:
|
||||
# runs-on: [self-hosted, Linux, X64, AMD]
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# id: checkout
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Test
|
||||
# id: ggml-ci
|
||||
# run: |
|
||||
# vulkaninfo --summary
|
||||
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
||||
#
|
||||
# ggml-ci-x64-amd-rocm:
|
||||
# runs-on: [self-hosted, Linux, X64, AMD]
|
||||
#
|
||||
# steps:
|
||||
# - name: Clone
|
||||
# id: checkout
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Test
|
||||
# id: ggml-ci
|
||||
# run: |
|
||||
# amd-smi static
|
||||
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
|
||||
|
||||
ggml-ci-mac-metal:
|
||||
runs-on: [self-hosted, macOS, ARM64]
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
/ggml/src/ggml-blas/ @slaren
|
||||
/ggml/src/ggml-common.h @ggerganov @slaren
|
||||
/ggml/src/ggml-cpu/ @ggerganov @slaren
|
||||
/ggml/src/ggml-cpu/spacemit/ @alex-spacemit
|
||||
/ggml/src/ggml-cuda/common.cuh @slaren
|
||||
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
||||
/ggml/src/ggml-cuda/ggml-cuda.cu @slaren
|
||||
@@ -59,6 +60,7 @@
|
||||
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
||||
/ggml/src/ggml-impl.h @ggerganov @slaren
|
||||
/ggml/src/ggml-metal/ @ggerganov
|
||||
/ggml/src/ggml-opencl/ @lhez @max-krasnyansky
|
||||
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
||||
/ggml/src/ggml-quants.* @ggerganov
|
||||
/ggml/src/ggml-rpc/ @rgerganov
|
||||
|
||||
@@ -114,6 +114,7 @@ if [ ! -z ${GG_BUILD_NO_SVE} ]; then
|
||||
# arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
|
||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
|
||||
fi
|
||||
|
||||
## helpers
|
||||
|
||||
# download a file if it does not exist or if it is outdated
|
||||
|
||||
29
cmake/riscv64-spacemit-linux-gnu-gcc.cmake
Normal file
29
cmake/riscv64-spacemit-linux-gnu-gcc.cmake
Normal file
@@ -0,0 +1,29 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR riscv64)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
|
||||
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
|
||||
else()
|
||||
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
|
||||
if (DEFINED ENV{RISCV_ROOT_PATH})
|
||||
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
|
||||
else()
|
||||
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
|
||||
endif()
|
||||
|
||||
set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
|
||||
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
|
||||
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
|
||||
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
|
||||
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
|
||||
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
|
||||
endif()
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
|
||||
89
docs/build-riscv64-spacemit.md
Normal file
89
docs/build-riscv64-spacemit.md
Normal file
@@ -0,0 +1,89 @@
|
||||
> [!IMPORTANT]
|
||||
> This build documentation is specific only to RISC-V SpacemiT SOCs.
|
||||
|
||||
## Build llama.cpp locally (for riscv64)
|
||||
|
||||
1. Prepare Toolchain For RISCV
|
||||
~~~
|
||||
wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
|
||||
~~~
|
||||
|
||||
2. Build
|
||||
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
|
||||
```bash
|
||||
|
||||
cmake -B build \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DGGML_CPU_RISCV64_SPACEMIT=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DGGML_RVV=ON \
|
||||
-DGGML_RV_ZFH=ON \
|
||||
-DGGML_RV_ZICBOP=ON \
|
||||
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=build/installed
|
||||
|
||||
cmake --build build --parallel $(nproc) --config Release
|
||||
|
||||
pushd build
|
||||
make install
|
||||
popd
|
||||
```
|
||||
|
||||
## Simulation
|
||||
You can use QEMU to perform emulation on non-RISC-V architectures.
|
||||
|
||||
1. Download QEMU
|
||||
~~~
|
||||
wget https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
|
||||
~~~
|
||||
|
||||
2. Run Simulation
|
||||
After build your llama.cpp, you can run the executable file via QEMU for simulation, for example:
|
||||
~~~
|
||||
export QEMU_ROOT_PATH={your QEMU file path}
|
||||
export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}
|
||||
|
||||
${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
|
||||
~~~
|
||||
## Performance
|
||||
#### Quantization Support For Matrix
|
||||
~~~
|
||||
model name : Spacemit(R) X60
|
||||
isa : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
|
||||
mmu : sv39
|
||||
uarch : spacemit,x60
|
||||
mvendorid : 0x710
|
||||
marchid : 0x8000000058000001
|
||||
~~~
|
||||
|
||||
Q4_0
|
||||
| Model | Size | Params | backend | threads | test | t/s |
|
||||
| -----------| -------- | ------ | ------- | ------- | ---- |------|
|
||||
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | pp512|64.12 ± 0.26|
|
||||
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | tg128|10.03 ± 0.01|
|
||||
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | pp512|24.16 ± 0.02|
|
||||
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | tg128|3.83 ± 0.06|
|
||||
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | pp512|12.08 ± 0.02|
|
||||
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | tg128|2.23 ± 0.02|
|
||||
|
||||
Q4_1
|
||||
| Model | Size | Params | backend | threads | test | t/s |
|
||||
| -----------| -------- | ------ | ------- | ------- | ---- |------|
|
||||
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | pp512|62.07 ± 0.12|
|
||||
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | tg128|9.91 ± 0.01|
|
||||
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | pp512|22.95 ± 0.25|
|
||||
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | tg128|4.01 ± 0.15|
|
||||
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | pp512|11.55 ± 0.16|
|
||||
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | tg128|2.25 ± 0.04|
|
||||
|
||||
|
||||
Q4_K
|
||||
| Model | Size | Params | backend | threads | test | t/s |
|
||||
| -----------| -------- | ------ | ------- | ------- | ---- |------|
|
||||
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | pp512|9.29 ± 0.05|
|
||||
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | tg128|5.67 ± 0.04|
|
||||
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | pp512|10.38 ± 0.10|
|
||||
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | tg128|3.17 ± 0.08|
|
||||
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | pp512|4.23 ± 0.04|
|
||||
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | tg128|1.73 ± 0.00|
|
||||
@@ -4,8 +4,7 @@ project("ggml" C CXX ASM)
|
||||
### GGML Version
|
||||
set(GGML_VERSION_MAJOR 0)
|
||||
set(GGML_VERSION_MINOR 9)
|
||||
set(GGML_VERSION_PATCH 0)
|
||||
set(GGML_VERSION_DEV "-dev") # "-dev" for development, "" for releases
|
||||
set(GGML_VERSION_PATCH 3)
|
||||
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
|
||||
|
||||
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
|
||||
@@ -26,8 +25,8 @@ if(GIT_EXE)
|
||||
)
|
||||
endif()
|
||||
|
||||
# Build the version string with optional -dev suffix and dirty flag
|
||||
set(GGML_VERSION "${GGML_VERSION_BASE}${GGML_VERSION_DEV}")
|
||||
# Build the version string with optional dirty flag
|
||||
set(GGML_VERSION "${GGML_VERSION_BASE}")
|
||||
if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
|
||||
set(GGML_VERSION "${GGML_VERSION}-dirty")
|
||||
endif()
|
||||
|
||||
@@ -135,6 +135,10 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
return p;
|
||||
}
|
||||
|
||||
static const char * dl_error() {
|
||||
return "";
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
using dl_handle = void;
|
||||
@@ -155,6 +159,11 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||
return dlsym(handle, name);
|
||||
}
|
||||
|
||||
static const char * dl_error() {
|
||||
const char *rslt = dlerror();
|
||||
return rslt != nullptr ? rslt : "";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
||||
@@ -240,7 +249,7 @@ struct ggml_backend_registry {
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
if (!silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@@ -530,7 +539,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
|
||||
if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
|
||||
dl_handle_ptr handle { dl_load_library(entry) };
|
||||
if (!handle && !silent) {
|
||||
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
|
||||
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
|
||||
}
|
||||
if (handle) {
|
||||
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
|
||||
|
||||
@@ -74,7 +74,7 @@ if (BLAS_FOUND)
|
||||
|
||||
target_compile_options(ggml-blas PRIVATE ${BLAS_LINKER_FLAGS})
|
||||
|
||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
|
||||
if ("${BLAS_INCLUDE_DIRS}" MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
|
||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -439,6 +439,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
ggml-cpu/arch/riscv/quants.c
|
||||
ggml-cpu/arch/riscv/repack.cpp
|
||||
)
|
||||
if (GGML_CPU_RISCV64_SPACEMIT)
|
||||
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT ${RISCV64_SPACEMIT_IME_SPEC})
|
||||
list(APPEND GGML_CPU_SOURCES
|
||||
ggml-cpu/spacemit/ime.cpp
|
||||
ggml-cpu/spacemit/ime.h
|
||||
ggml-cpu/spacemit/ime1_kernels.cpp
|
||||
ggml-cpu/spacemit/ime_kernels.h
|
||||
)
|
||||
endif()
|
||||
set(MARCH_STR "rv64gc")
|
||||
if (GGML_RV_ZFH)
|
||||
string(APPEND MARCH_STR "_zfh")
|
||||
@@ -504,9 +513,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
|
||||
# Fetch KleidiAI sources:
|
||||
include(FetchContent)
|
||||
set(KLEIDIAI_COMMIT_TAG "v1.13.0")
|
||||
set(KLEIDIAI_COMMIT_TAG "v1.14.0")
|
||||
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "d82a8de939d9814621a5ba23907bdac1")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "45e110675d93f99f82c23a1afcca76bc")
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
@@ -583,6 +592,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
|
||||
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c
|
||||
${KLEIDIAI_SRC}/kai/kai_common_sme_asm.S)
|
||||
|
||||
@@ -18,6 +18,10 @@
|
||||
# include "kleidiai/kleidiai.h"
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
|
||||
# include "spacemit/ime.h"
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# ifndef NOMINMAX
|
||||
@@ -45,6 +49,12 @@ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_type
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
|
||||
if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
|
||||
bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_CPU_KLEIDIAI
|
||||
if (ggml_backend_cpu_kleidiai_buffer_type()) {
|
||||
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
|
||||
|
||||
@@ -87,15 +87,38 @@ static inline int64_t ggml_ne(const ggml_tensor * tensor, int dim) {
|
||||
return tensor->ne[dim];
|
||||
}
|
||||
|
||||
template <typename Variant, typename Ret, typename... Args, std::size_t... Is>
|
||||
constexpr bool variant_any_invocable_impl(std::index_sequence<Is...>) {
|
||||
using V = std::remove_reference_t<Variant>;
|
||||
return (std::is_invocable_r_v<
|
||||
Ret,
|
||||
std::variant_alternative_t<Is, V>,
|
||||
Args...> || ...);
|
||||
}
|
||||
|
||||
template <typename Variant, typename Ret, typename... Args>
|
||||
constexpr bool variant_any_invocable_v =
|
||||
variant_any_invocable_impl<Variant, Ret, Args...>(
|
||||
std::make_index_sequence<
|
||||
std::variant_size_v<std::remove_reference_t<Variant>>>{});
|
||||
|
||||
template<typename Ret, typename Variant, typename... Args>
|
||||
static Ret variant_call(const Variant & var, Args&&... args) {
|
||||
return std::visit([&](auto&& func) -> Ret {
|
||||
if constexpr (std::is_invocable_r_v<Ret, decltype(func), Args...>) {
|
||||
return func(std::forward<Args>(args)...);
|
||||
} else {
|
||||
throw std::runtime_error("Invalid function type in variant_call");
|
||||
}
|
||||
}, var);
|
||||
static inline Ret variant_call(Variant && var, Args&&... args) {
|
||||
static_assert(variant_any_invocable_v<std::remove_reference_t<Variant>, Ret, Args...>,
|
||||
"No alternative in Variant is invocable with the provided arguments and return type.");
|
||||
|
||||
return std::visit(
|
||||
[&](auto && f) -> Ret {
|
||||
using F = std::decay_t<decltype(f)>;
|
||||
if constexpr (std::is_invocable_r_v<Ret, F, Args...>) {
|
||||
return std::invoke(std::forward<decltype(f)>(f), std::forward<Args>(args)...);
|
||||
} else {
|
||||
GGML_ABORT("Invalid function type in variant_call");
|
||||
GGML_UNREACHABLE();
|
||||
}
|
||||
},
|
||||
std::forward<Variant>(var)
|
||||
);
|
||||
}
|
||||
|
||||
namespace ggml::cpu::kleidiai {
|
||||
@@ -138,7 +161,10 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
if (kernels->rhs_type == GGML_TYPE_Q4_0) {
|
||||
size = variant_call<size_t>(lhs_info->packed_size, m, k, QK4_0, mr, kr, sr);
|
||||
} else if (kernels->rhs_type == GGML_TYPE_F16) {
|
||||
size = variant_call<size_t>(lhs_info->packed_size, m, k, mr, kr, sr) +
|
||||
const int64_t lhs_batch_size0 = op->src[1]->ne[2];
|
||||
const int64_t rhs_batch_size0 = op->src[0]->ne[2];
|
||||
const int64_t r = lhs_batch_size0 / rhs_batch_size0;
|
||||
size = variant_call<size_t>(lhs_info->packed_size, m * r, k, mr, kr, sr) +
|
||||
variant_call<size_t>(kernels->rhs_info.packed_size, n, k) +
|
||||
k * n * sizeof(float) + n * sizeof(float);
|
||||
} else {
|
||||
@@ -148,7 +174,6 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * dst) override {
|
||||
if (dst->op == GGML_OP_MUL_MAT) {
|
||||
if (dst->src[0]->type == GGML_TYPE_Q4_0) {
|
||||
@@ -165,8 +190,6 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
}
|
||||
|
||||
bool compute_forward_fp16(ggml_compute_params * params, struct ggml_tensor * dst) {
|
||||
static std::atomic_flag first_to_arrive = ATOMIC_FLAG_INIT;
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
@@ -175,7 +198,7 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst);
|
||||
GGML_ASSERT(kernels);
|
||||
|
||||
bool is_gemv = src1->ne[1] == 1;
|
||||
const bool is_gemv = src1->ne[1] == 1;
|
||||
kernel_info * kernel = is_gemv ? &kernels->gemv : &kernels->gemm;
|
||||
lhs_packing_info * lhs_info = is_gemv ? &kernels->gemv_lhs_info : &kernels->gemm_lhs_info;
|
||||
GGML_ASSERT(kernel);
|
||||
@@ -185,27 +208,30 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
|
||||
const int64_t lhs_batch_size0 = ne12;
|
||||
const int64_t rhs_batch_size0 = ne02;
|
||||
const int64_t batch_size = rhs_batch_size0;
|
||||
const int64_t batch_size = lhs_batch_size0;
|
||||
|
||||
GGML_ASSERT(rhs_batch_size0 > 0);
|
||||
GGML_ASSERT(lhs_batch_size0 % rhs_batch_size0 == 0);
|
||||
const int64_t r = lhs_batch_size0 / rhs_batch_size0;
|
||||
|
||||
const int64_t m = ne11 * r;
|
||||
const int64_t n = ne01;
|
||||
const int64_t k = ne00;
|
||||
const int64_t m_group = ne11;
|
||||
const int64_t m = m_group;
|
||||
const int64_t n = ne01;
|
||||
const int64_t k = ne00;
|
||||
|
||||
const size_t lhs_stride = src1->nb[1];
|
||||
const size_t rhs_stride = src0->nb[1];
|
||||
const size_t dst_stride = dst->nb[1];
|
||||
|
||||
const int64_t mr = static_cast<int64_t>(kernel->get_mr());
|
||||
const int64_t nr = static_cast<int64_t>(kernel->get_nr());
|
||||
const int64_t kr = static_cast<int64_t>(kernel->get_kr());
|
||||
const int64_t sr = static_cast<int64_t>(kernel->get_sr());
|
||||
const int64_t mr = (int64_t) kernel->get_mr();
|
||||
const int64_t nr = (int64_t) kernel->get_nr();
|
||||
const int64_t kr = (int64_t) kernel->get_kr();
|
||||
const int64_t sr = (int64_t) kernel->get_sr();
|
||||
|
||||
const size_t lhs_packed_size = variant_call<size_t>(lhs_info->packed_size, m, k, mr, kr, sr);
|
||||
const size_t rhs_packed_size = variant_call<size_t>(kernels->rhs_info.packed_size, n, k);
|
||||
const size_t kxn_size = k * n * sizeof(float);
|
||||
const size_t bias_size = n * sizeof(float);
|
||||
const size_t lhs_packed_size = variant_call<size_t>(lhs_info->packed_size, (size_t)m, (size_t)k, (size_t)mr, (size_t)kr, (size_t)sr);
|
||||
const size_t rhs_packed_size = variant_call<size_t>(kernels->rhs_info.packed_size, (size_t)n, (size_t)k);
|
||||
const size_t kxn_size = (size_t)k * (size_t)n * sizeof(float);
|
||||
const size_t bias_size = (size_t)n * sizeof(float);
|
||||
|
||||
const size_t wsize_required = lhs_packed_size + rhs_packed_size + kxn_size + bias_size;
|
||||
GGML_ASSERT(wsize_required <= params->wsize);
|
||||
@@ -216,82 +242,102 @@ class tensor_traits : public ggml::cpu::tensor_traits {
|
||||
uint8_t * bias = rhs_kxn + kxn_size;
|
||||
|
||||
for (int64_t batch_idx = 0; batch_idx < batch_size; ++batch_idx) {
|
||||
const uint8_t * lhs_batch = static_cast<const uint8_t *>(src1->data) + batch_idx * m * lhs_stride;
|
||||
const uint8_t * rhs_batch = static_cast<const uint8_t *>(src0->data) + batch_idx * n * rhs_stride;
|
||||
uint8_t * dst_batch = static_cast<uint8_t *>(dst->data) + batch_idx * m * dst_stride;
|
||||
const int64_t rhs_batch_idx = batch_idx / r;
|
||||
const uint8_t * rhs_batch_base = static_cast<const uint8_t *>(src0->data) + rhs_batch_idx * src0->nb[2];
|
||||
uint8_t * dst_batch_base = static_cast<uint8_t *>(dst->data) + batch_idx * dst->nb[2];
|
||||
|
||||
// LHS packing
|
||||
// LHS packing (threaded over m, honoring mr alignment and KV groups)
|
||||
{
|
||||
const int64_t m_roundup_mr = kai_roundup(m, mr);
|
||||
const int64_t num_threads = KAI_MIN(m_roundup_mr / mr, nth);
|
||||
|
||||
if (ith < num_threads) {
|
||||
const int64_t num_m_per_thread0 = round_down(m_roundup_mr / num_threads, mr);
|
||||
const int64_t num_m_per_thread0 = round_down((size_t)(m_roundup_mr / num_threads), (size_t)mr);
|
||||
const int64_t num_m_per_threadN_1 = m - (num_threads - 1) * num_m_per_thread0;
|
||||
|
||||
const int64_t m_start = ith * num_m_per_thread0;
|
||||
const int64_t num_m_per_thread = (ith == num_threads - 1) ? num_m_per_threadN_1 : num_m_per_thread0;
|
||||
const int64_t m_start = ith * num_m_per_thread0;
|
||||
const int64_t m_count = (ith == num_threads - 1) ? num_m_per_threadN_1 : num_m_per_thread0;
|
||||
|
||||
const size_t lhs_offset = variant_call<size_t>(kernels->gemm.get_lhs_offset, m_start, lhs_stride);
|
||||
const size_t lhs_packed_offset = variant_call<size_t>(lhs_info->get_packed_offset, m_start, k, mr, kr, sr);
|
||||
// Base packed offset (aligned) and per-row stride in bytes
|
||||
const size_t base_packed_off = variant_call<size_t>(
|
||||
lhs_info->get_packed_offset, (size_t)m_start, (size_t)k, (size_t)mr, (size_t)kr, (size_t)sr);
|
||||
const size_t next_block_off = variant_call<size_t>(
|
||||
lhs_info->get_packed_offset, (size_t)(m_start + mr), (size_t)k, (size_t)mr, (size_t)kr, (size_t)sr);
|
||||
const size_t row_stride_bytes = (next_block_off - base_packed_off) / (size_t)mr;
|
||||
|
||||
const void * src_ptr = static_cast<const uint8_t *>(lhs_batch) + lhs_offset;
|
||||
void * dst_ptr = static_cast<uint8_t *>(lhs_packed) + lhs_packed_offset;
|
||||
int64_t remaining = m_count;
|
||||
int64_t cur = m_start;
|
||||
|
||||
variant_call<void>(lhs_info->pack_func, num_m_per_thread, k, mr, kr, sr, 0, src_ptr, lhs_stride, dst_ptr);
|
||||
while (remaining > 0) {
|
||||
const int64_t row_in_group = cur;
|
||||
const int64_t avail = m_group - row_in_group;
|
||||
const int64_t take = std::min(avail, remaining);
|
||||
|
||||
const uint8_t * lhs_batch_base = static_cast<const uint8_t *>(src1->data) + batch_idx * src1->nb[2];
|
||||
const void * src_ptr = lhs_batch_base + (size_t)row_in_group * lhs_stride;
|
||||
const size_t dst_off = base_packed_off + (size_t)(cur - m_start) * row_stride_bytes;
|
||||
void * dst_ptr = lhs_packed + dst_off;
|
||||
|
||||
variant_call<void>(lhs_info->pack_func,
|
||||
(size_t)take, (size_t)k, (size_t)mr, (size_t)kr, (size_t)sr,
|
||||
/*m_idx_start*/ 0, src_ptr, lhs_stride, dst_ptr);
|
||||
|
||||
cur += take;
|
||||
remaining -= take;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RHS packing
|
||||
if (first_to_arrive.test_and_set(std::memory_order_acquire) == false) {
|
||||
// First thread to reach this point handles RHS packing
|
||||
memset(bias, 0, n * sizeof(float));
|
||||
transpose_f32kxn_f16nxk(n, k, reinterpret_cast<float *>(rhs_kxn),
|
||||
reinterpret_cast<const uint16_t *>(rhs_batch), rhs_stride);
|
||||
// RHS packing (single thread), then synchronize
|
||||
if (ith == 0) {
|
||||
memset(bias, 0, (size_t)n * sizeof(float));
|
||||
transpose_f32kxn_f16nxk((size_t)n, (size_t)k,
|
||||
reinterpret_cast<float *>(rhs_kxn),
|
||||
reinterpret_cast<const uint16_t *>(rhs_batch_base),
|
||||
rhs_stride);
|
||||
|
||||
variant_call<void>(kernels->rhs_info.pack_func, 1, n, k, nr, kr, sr, n * sizeof(float),
|
||||
rhs_kxn, bias, nullptr, rhs_packed, 0, nullptr);
|
||||
variant_call<void>(kernels->rhs_info.pack_func,
|
||||
/*num_groups*/ 1, (size_t)n, (size_t)k, (size_t)nr, (size_t)kr, (size_t)sr,
|
||||
/*rhs_stride (bytes)*/ (size_t)(n * sizeof(float)),
|
||||
rhs_kxn, bias, nullptr, rhs_packed, /*extra_bytes*/ 0, /*params*/ nullptr);
|
||||
}
|
||||
|
||||
ggml_barrier(params->threadpool);
|
||||
|
||||
first_to_arrive.clear(std::memory_order_release);
|
||||
|
||||
// Perform the matmul
|
||||
// Matmul (threaded over n)
|
||||
{
|
||||
const int64_t m_to_process = m;
|
||||
const int64_t m_start = 0;
|
||||
|
||||
const int64_t n_step = static_cast<int64_t>(kernel->get_n_step());
|
||||
int64_t num_threads = KAI_MIN(n / n_step, nth);
|
||||
if (num_threads <= 0) {
|
||||
num_threads = 1;
|
||||
const int64_t n_step = (int64_t) kernel->get_n_step();
|
||||
int64_t num_threads_n = KAI_MIN(n / n_step, nth);
|
||||
if (num_threads_n <= 0) {
|
||||
num_threads_n = 1;
|
||||
}
|
||||
|
||||
if (ith < num_threads) {
|
||||
const int64_t num_n_per_thread0 = round_down(n / num_threads, n_step);
|
||||
const int64_t num_n_per_threadN_1 = n - (num_threads - 1) * num_n_per_thread0;
|
||||
if (ith < num_threads_n) {
|
||||
const int64_t num_n_per_thread0 = round_down((size_t)(n / num_threads_n), (size_t)n_step);
|
||||
const int64_t num_n_per_threadN_1 = n - (num_threads_n - 1) * num_n_per_thread0;
|
||||
|
||||
const int64_t n_start = ith * num_n_per_thread0;
|
||||
const int64_t n_to_process = (ith == num_threads - 1) ? num_n_per_threadN_1 : num_n_per_thread0;
|
||||
const int64_t n_to_process = (ith == num_threads_n - 1) ? num_n_per_threadN_1 : num_n_per_thread0;
|
||||
|
||||
const size_t lhs_packed_offset = variant_call<size_t>(kernel->get_lhs_offset, m_start, k);
|
||||
const size_t rhs_packed_offset = variant_call<size_t>(kernel->get_rhs_packed_offset, n_start, k);
|
||||
const size_t dst_offset = kernel->get_dst_offset(m_start, n_start, dst_stride);
|
||||
// LHS packed base at row 0 (consistent with packing above)
|
||||
const size_t lhs_packed_offset0 = variant_call<size_t>(
|
||||
lhs_info->get_packed_offset, (size_t)0, (size_t)k, (size_t)mr, (size_t)kr, (size_t)sr);
|
||||
const size_t rhs_packed_offset = variant_call<size_t>(kernel->get_rhs_packed_offset, (size_t)n_start, (size_t)k);
|
||||
const size_t dst_offset = kernel->get_dst_offset((size_t)0, (size_t)n_start, dst_stride);
|
||||
|
||||
const void * lhs_ptr = lhs_packed + lhs_packed_offset;
|
||||
const void * lhs_ptr = lhs_packed + lhs_packed_offset0;
|
||||
const void * rhs_ptr = rhs_packed + rhs_packed_offset;
|
||||
float * dst_ptr = reinterpret_cast<float *>(dst_batch + dst_offset);
|
||||
float * dst_ptr = reinterpret_cast<float *>(dst_batch_base + dst_offset);
|
||||
|
||||
variant_call<void>(kernel->run_kernel, m_to_process, n_to_process, k, lhs_ptr, rhs_ptr, dst_ptr, dst_stride, sizeof(float), -FLT_MAX, FLT_MAX);
|
||||
variant_call<void>(kernel->run_kernel,
|
||||
(size_t)m, (size_t)n_to_process, (size_t)k,
|
||||
lhs_ptr, rhs_ptr,
|
||||
dst_ptr, dst_stride, sizeof(float),
|
||||
-FLT_MAX, FLT_MAX);
|
||||
}
|
||||
}
|
||||
|
||||
if (batch_idx != batch_size - 1) {
|
||||
// This barrier is necessary when the batch size is larger than 1. While processing a batch,
|
||||
// the work data buffer (params->wdata) is used as temporary storage which means that only
|
||||
// a single batch can be processed at any given time. No barrier is needed for the last
|
||||
// batch since GGML inserts a barrier between the execution of every operator.
|
||||
ggml_barrier(params->threadpool);
|
||||
}
|
||||
}
|
||||
|
||||
1024
ggml/src/ggml-cpu/spacemit/ime.cpp
Normal file
1024
ggml/src/ggml-cpu/spacemit/ime.cpp
Normal file
File diff suppressed because it is too large
Load Diff
13
ggml/src/ggml-cpu/spacemit/ime.h
Normal file
13
ggml/src/ggml-cpu/spacemit/ime.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml-alloc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_riscv64_spacemit_buffer_type(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
3196
ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp
Normal file
3196
ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp
Normal file
File diff suppressed because it is too large
Load Diff
26
ggml/src/ggml-cpu/spacemit/ime_kernels.h
Normal file
26
ggml/src/ggml-cpu/spacemit/ime_kernels.h
Normal file
@@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace sqnbitgemm_spacemit_ime {
|
||||
namespace ime1 {
|
||||
size_t gemm_kernel_i8i4(size_t blk_len,
|
||||
const std::byte * quant_a_ptr,
|
||||
const std::byte * quant_b_data,
|
||||
const float * quant_b_scale,
|
||||
const std::byte * quant_b_zp,
|
||||
float * c_ptr,
|
||||
size_t count_m,
|
||||
size_t count_n,
|
||||
size_t count_k,
|
||||
size_t block_count_k,
|
||||
size_t ldc,
|
||||
const float * bias,
|
||||
const size_t scale_stride);
|
||||
|
||||
void quantize_a_row_i8(size_t blk_len, const float * a_ptr, size_t count_k, std::byte * quant_a_ptr);
|
||||
|
||||
void quantize_a_4row_i8(size_t blk_len, const float * a_ptr, size_t count_k, std::byte * quant_a_ptr);
|
||||
|
||||
} // namespace ime1
|
||||
} // namespace sqnbitgemm_spacemit_ime
|
||||
@@ -3639,9 +3639,11 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_SUM:
|
||||
case GGML_OP_ARGSORT:
|
||||
case GGML_OP_ACC:
|
||||
return true;
|
||||
case GGML_OP_ARGSORT:
|
||||
// TODO: Support arbitrary column width
|
||||
return op->src[0]->ne[0] <= 1024;
|
||||
case GGML_OP_SUM_ROWS:
|
||||
case GGML_OP_MEAN:
|
||||
case GGML_OP_GROUP_NORM:
|
||||
|
||||
@@ -683,9 +683,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
||||
(ggml_get_op_params_i32(op, 4) == 0) && (ggml_get_op_params_i32(op, 6) == 0);
|
||||
case GGML_OP_PAD_REFLECT_1D:
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
case GGML_OP_ARGSORT:
|
||||
case GGML_OP_LEAKY_RELU:
|
||||
return op->src[0]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_ARGSORT:
|
||||
// TODO: Support arbitrary column width
|
||||
return op->src[0]->ne[0] <= 1024;
|
||||
case GGML_OP_ARANGE:
|
||||
return true;
|
||||
case GGML_OP_FLASH_ATTN_EXT:
|
||||
|
||||
@@ -67,30 +67,48 @@ layout (binding = 5) writeonly buffer O {D_TYPE data_o[];};
|
||||
#if defined(A_TYPE_PACKED16)
|
||||
#define BINDING_IDX_K 0
|
||||
#define BINDING_IDX_V 1
|
||||
layout (binding = 1) readonly buffer KV_PACKED16 {A_TYPE_PACKED16 data_packed16[];} kv_packed[2];
|
||||
layout (binding = 1) readonly buffer K_PACKED16 {A_TYPE_PACKED16 k_data_packed16[];} k_packed;
|
||||
layout (binding = 2) readonly buffer V_PACKED16 {A_TYPE_PACKED16 v_data_packed16[];} v_packed;
|
||||
#endif
|
||||
|
||||
#if defined(DATA_A_Q4_0)
|
||||
#define BLOCK_BYTE_SIZE 18
|
||||
|
||||
vec4 dequantize4(uint ib, uint iqs, uint a_offset, uint binding_idx) {
|
||||
uint vui_lo = uint(kv_packed[binding_idx].data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 0]);
|
||||
uint vui_hi = uint(kv_packed[binding_idx].data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 1]);
|
||||
uint shift = (iqs & 0x10) >> 2;
|
||||
vui_lo >>= shift;
|
||||
vui_hi >>= shift;
|
||||
if (binding_idx == BINDING_IDX_K) {
|
||||
uint vui_lo = uint(k_packed.k_data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 0]);
|
||||
uint vui_hi = uint(k_packed.k_data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 1]);
|
||||
uint shift = (iqs & 0x10) >> 2;
|
||||
vui_lo >>= shift;
|
||||
vui_hi >>= shift;
|
||||
|
||||
return float(kv_packed[binding_idx].data_packed16[a_offset + ib].d) * (vec4(vui_lo & 0xF, (vui_lo >> 8) & 0xF, vui_hi & 0xF, (vui_hi >> 8) & 0xF) - 8.0f);
|
||||
return float(k_packed.k_data_packed16[a_offset + ib].d) * (vec4(vui_lo & 0xF, (vui_lo >> 8) & 0xF, vui_hi & 0xF, (vui_hi >> 8) & 0xF) - 8.0f);
|
||||
} else {
|
||||
uint vui_lo = uint(v_packed.v_data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 0]);
|
||||
uint vui_hi = uint(v_packed.v_data_packed16[a_offset + ib].qs[(iqs & 0xF) / 2 + 1]);
|
||||
uint shift = (iqs & 0x10) >> 2;
|
||||
vui_lo >>= shift;
|
||||
vui_hi >>= shift;
|
||||
|
||||
return float(v_packed.v_data_packed16[a_offset + ib].d) * (vec4(vui_lo & 0xF, (vui_lo >> 8) & 0xF, vui_hi & 0xF, (vui_hi >> 8) & 0xF) - 8.0f);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(DATA_A_Q8_0)
|
||||
#define BLOCK_BYTE_SIZE 34
|
||||
vec4 dequantize4(uint ib, uint iqs, uint a_offset, uint binding_idx) {
|
||||
const i8vec2 v0 = unpack8(int32_t(kv_packed[binding_idx].data_packed16[a_offset + ib].qs[iqs / 2])).xy; // vec4 used due to #12147
|
||||
const i8vec2 v1 = unpack8(int32_t(kv_packed[binding_idx].data_packed16[a_offset + ib].qs[iqs / 2 + 1])).xy;
|
||||
if (binding_idx == BINDING_IDX_K) {
|
||||
const i8vec2 v0 = unpack8(int32_t(k_packed.k_data_packed16[a_offset + ib].qs[iqs / 2])).xy; // vec4 used due to #12147
|
||||
const i8vec2 v1 = unpack8(int32_t(k_packed.k_data_packed16[a_offset + ib].qs[iqs / 2 + 1])).xy;
|
||||
|
||||
return float(kv_packed[binding_idx].data_packed16[a_offset + ib].d) * vec4(v0.x, v0.y, v1.x, v1.y);
|
||||
return float(k_packed.k_data_packed16[a_offset + ib].d) * vec4(v0.x, v0.y, v1.x, v1.y);
|
||||
} else {
|
||||
const i8vec2 v0 = unpack8(int32_t(v_packed.v_data_packed16[a_offset + ib].qs[iqs / 2])).xy; // vec4 used due to #12147
|
||||
const i8vec2 v1 = unpack8(int32_t(v_packed.v_data_packed16[a_offset + ib].qs[iqs / 2 + 1])).xy;
|
||||
|
||||
return float(v_packed.v_data_packed16[a_offset + ib].d) * vec4(v0.x, v0.y, v1.x, v1.y);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -3687,6 +3687,7 @@ struct ggml_tensor * ggml_set_rows(
|
||||
result->op = GGML_OP_SET_ROWS;
|
||||
result->src[0] = b;
|
||||
result->src[1] = c;
|
||||
result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1 +1 @@
|
||||
978f6e1993f2eeb4e99b63d4e70b4401c0a2dae2
|
||||
83a15e113b130337a892fb6575c337754557d56f
|
||||
|
||||
@@ -2140,6 +2140,27 @@ struct test_set_rows : public test_case {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_IQ4_NL ||
|
||||
type == GGML_TYPE_Q5_0 || type == GGML_TYPE_Q5_1 || type == GGML_TYPE_Q8_0) {
|
||||
// estimate what the max nmse error would be if one quantized value is
|
||||
// off by one. The test values are distributed in [-1,1], so it'll be
|
||||
// roughly (2.0 / 2^bits)^2, divided by the mean square value of the reference,
|
||||
// which is roughly 0.25 times the number of elements.
|
||||
double err_estimate = 1.0f/8.0f;
|
||||
if (type == GGML_TYPE_Q5_0 || type == GGML_TYPE_Q5_1) {
|
||||
err_estimate /= 2.0f;
|
||||
}
|
||||
if (type == GGML_TYPE_Q8_0) {
|
||||
err_estimate /= 8.0f;
|
||||
}
|
||||
err_estimate *= err_estimate;
|
||||
err_estimate /= 0.25f*float(ne[0] * r * ne[2]*nr23[0] * ne[3]*nr23[1]);
|
||||
return err_estimate;
|
||||
}
|
||||
return 1e-7;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ARGMAX
|
||||
@@ -2430,6 +2451,30 @@ struct test_cpy : public test_case {
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
if (type_src == type_dst) {
|
||||
return 0.0;
|
||||
}
|
||||
if (type_dst == GGML_TYPE_Q4_0 || type_dst == GGML_TYPE_Q4_1 || type_dst == GGML_TYPE_IQ4_NL ||
|
||||
type_dst == GGML_TYPE_Q5_0 || type_dst == GGML_TYPE_Q5_1 || type_dst == GGML_TYPE_Q8_0) {
|
||||
// estimate what the max nmse error would be if one quantized value is
|
||||
// off by one. The test values are distributed in [-150,150], so it'll be
|
||||
// roughly (150*2.0 / 2^bits)^2, divided by the mean square value of the reference,
|
||||
// which is roughly 0.25*150^2 times the number of elements.
|
||||
double err_estimate = 1.0f/8.0f * 150.0f;
|
||||
if (type_dst == GGML_TYPE_IQ4_NL) {
|
||||
// iq4_nl values are a bit more spread out
|
||||
err_estimate *= 2.0f;
|
||||
}
|
||||
if (type_dst == GGML_TYPE_Q5_0 || type_dst == GGML_TYPE_Q5_1) {
|
||||
err_estimate /= 2.0f;
|
||||
}
|
||||
if (type_dst == GGML_TYPE_Q8_0) {
|
||||
err_estimate /= 8.0f;
|
||||
}
|
||||
err_estimate *= err_estimate;
|
||||
err_estimate /= (150.0f*150.0f*0.25f)*float(ne[0] * ne[1] * ne[2] * ne[3]);
|
||||
return err_estimate;
|
||||
}
|
||||
return 1e-6;
|
||||
}
|
||||
|
||||
@@ -6567,6 +6612,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {16, 10, 10, 10}, order));
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {1024, 1, 1, 1}, order));
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {16384, 1, 1, 1}, order)); // bailingmoe2 (group selection)
|
||||
}
|
||||
|
||||
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
|
||||
|
||||
@@ -707,6 +707,10 @@ int main(int argc, char ** argv) {
|
||||
|
||||
embd.push_back(id);
|
||||
|
||||
if (params.conversation_mode && !waiting_for_first_input && !llama_vocab_is_eog(vocab, id)) {
|
||||
assistant_ss << common_token_to_piece(ctx, id, false);
|
||||
}
|
||||
|
||||
// echo this to console
|
||||
input_echo = true;
|
||||
|
||||
@@ -824,11 +828,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
// if current token is not EOG, we add it to current assistant message
|
||||
if (params.conversation_mode && !waiting_for_first_input) {
|
||||
const auto id = common_sampler_last(smpl);
|
||||
assistant_ss << common_token_to_piece(ctx, id, false);
|
||||
|
||||
if (!prompt.empty()) {
|
||||
prompt.clear();
|
||||
is_interacting = false;
|
||||
|
||||
@@ -1931,11 +1931,13 @@ static void kl_divergence(llama_context * ctx, const common_params & params) {
|
||||
LOG("Maximum KLD: %10.6f\n", kld_values.back());
|
||||
LOG("99.9%% KLD: %10.6f\n", percentile(kld_values, 0.999f));
|
||||
LOG("99.0%% KLD: %10.6f\n", percentile(kld_values, 0.990f));
|
||||
LOG("95.0%% KLD: %10.6f\n", percentile(kld_values, 0.950f));
|
||||
LOG("90.0%% KLD: %10.6f\n", percentile(kld_values, 0.900f));
|
||||
LOG("Median KLD: %10.6f\n", kld_median);
|
||||
LOG("10.0%% KLD: %10.6f\n", percentile(kld_values, 0.100f));
|
||||
LOG(" 5.0%% KLD: %10.6f\n", percentile(kld_values, 0.050f));
|
||||
LOG(" 1.0%% KLD: %10.6f\n", percentile(kld_values, 0.010f));
|
||||
LOG(" 0.1%% KLD: %10.6f\n", percentile(kld_values, 0.001f));
|
||||
LOG("Minimum KLD: %10.6f\n", kld_values.front());
|
||||
|
||||
LOG("\n");
|
||||
|
||||
Binary file not shown.
@@ -39,6 +39,7 @@
|
||||
--sidebar-ring: oklch(0.708 0 0);
|
||||
--code-background: oklch(0.225 0 0);
|
||||
--code-foreground: oklch(0.875 0 0);
|
||||
--layer-popover: 1000000;
|
||||
}
|
||||
|
||||
.dark {
|
||||
|
||||
@@ -362,7 +362,8 @@
|
||||
|
||||
<Dialog.Root {open} onOpenChange={handleClose}>
|
||||
<Dialog.Content
|
||||
class="z-999999 flex h-[100vh] flex-col gap-0 rounded-none p-0 md:h-[64vh] md:rounded-lg"
|
||||
class="z-999999 flex h-[100dvh] max-h-[100dvh] min-h-[100dvh] flex-col gap-0 rounded-none p-0
|
||||
md:h-[64vh] md:max-h-[64vh] md:min-h-0 md:rounded-lg"
|
||||
style="max-width: 48rem;"
|
||||
>
|
||||
<div class="flex flex-1 flex-col overflow-hidden md:flex-row">
|
||||
@@ -441,7 +442,7 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<ScrollArea class="max-h-[calc(100vh-13.5rem)] flex-1">
|
||||
<ScrollArea class="max-h-[calc(100dvh-13.5rem)] flex-1 md:max-h-[calc(100vh-13.5rem)]">
|
||||
<div class="space-y-6 p-4 md:p-6">
|
||||
<div>
|
||||
<div class="mb-6 flex hidden items-center gap-2 border-b border-border/30 pb-6 md:flex">
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
import * as Select from '$lib/components/ui/select';
|
||||
import { Textarea } from '$lib/components/ui/textarea';
|
||||
import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config';
|
||||
import { IsMobile } from '$lib/hooks/is-mobile.svelte';
|
||||
import { supportsVision } from '$lib/stores/server.svelte';
|
||||
import type { Component } from 'svelte';
|
||||
|
||||
@@ -17,8 +16,6 @@
|
||||
}
|
||||
|
||||
let { fields, localConfig, onConfigChange, onThemeChange }: Props = $props();
|
||||
|
||||
let isMobile = $state(new IsMobile());
|
||||
</script>
|
||||
|
||||
{#each fields as field (field.key)}
|
||||
@@ -30,10 +27,10 @@
|
||||
|
||||
<Input
|
||||
id={field.key}
|
||||
value={String(localConfig[field.key] || '')}
|
||||
value={String(localConfig[field.key] ?? '')}
|
||||
onchange={(e) => onConfigChange(field.key, e.currentTarget.value)}
|
||||
placeholder={`Default: ${SETTING_CONFIG_DEFAULT[field.key] || 'none'}`}
|
||||
class={isMobile ? 'w-full' : 'max-w-md'}
|
||||
placeholder={`Default: ${SETTING_CONFIG_DEFAULT[field.key] ?? 'none'}`}
|
||||
class="w-full md:max-w-md"
|
||||
/>
|
||||
{#if field.help || SETTING_CONFIG_INFO[field.key]}
|
||||
<p class="mt-1 text-xs text-muted-foreground">
|
||||
@@ -47,10 +44,10 @@
|
||||
|
||||
<Textarea
|
||||
id={field.key}
|
||||
value={String(localConfig[field.key] || '')}
|
||||
value={String(localConfig[field.key] ?? '')}
|
||||
onchange={(e) => onConfigChange(field.key, e.currentTarget.value)}
|
||||
placeholder={`Default: ${SETTING_CONFIG_DEFAULT[field.key] || 'none'}`}
|
||||
class={isMobile ? 'min-h-[100px] w-full' : 'min-h-[100px] max-w-2xl'}
|
||||
placeholder={`Default: ${SETTING_CONFIG_DEFAULT[field.key] ?? 'none'}`}
|
||||
class="min-h-[100px] w-full md:max-w-2xl"
|
||||
/>
|
||||
{#if field.help || SETTING_CONFIG_INFO[field.key]}
|
||||
<p class="mt-1 text-xs text-muted-foreground">
|
||||
@@ -78,7 +75,7 @@
|
||||
}
|
||||
}}
|
||||
>
|
||||
<Select.Trigger class={isMobile ? 'w-full' : 'max-w-md'}>
|
||||
<Select.Trigger class="w-full md:w-auto md:max-w-md">
|
||||
<div class="flex items-center gap-2">
|
||||
{#if selectedOption?.icon}
|
||||
{@const IconComponent = selectedOption.icon}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
<script lang="ts">
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
|
||||
interface Props {
|
||||
onReset?: () => void;
|
||||
@@ -8,8 +9,15 @@
|
||||
|
||||
let { onReset, onSave }: Props = $props();
|
||||
|
||||
function handleReset() {
|
||||
let showResetDialog = $state(false);
|
||||
|
||||
function handleResetClick() {
|
||||
showResetDialog = true;
|
||||
}
|
||||
|
||||
function handleConfirmReset() {
|
||||
onReset?.();
|
||||
showResetDialog = false;
|
||||
}
|
||||
|
||||
function handleSave() {
|
||||
@@ -18,7 +26,23 @@
|
||||
</script>
|
||||
|
||||
<div class="flex justify-between border-t border-border/30 p-6">
|
||||
<Button variant="outline" onclick={handleReset}>Reset to default</Button>
|
||||
<Button variant="outline" onclick={handleResetClick}>Reset to default</Button>
|
||||
|
||||
<Button onclick={handleSave}>Save settings</Button>
|
||||
</div>
|
||||
|
||||
<AlertDialog.Root bind:open={showResetDialog}>
|
||||
<AlertDialog.Content>
|
||||
<AlertDialog.Header>
|
||||
<AlertDialog.Title>Reset Settings to Default</AlertDialog.Title>
|
||||
<AlertDialog.Description>
|
||||
Are you sure you want to reset all settings to their default values? This action cannot be
|
||||
undone and will permanently remove all your custom configurations.
|
||||
</AlertDialog.Description>
|
||||
</AlertDialog.Header>
|
||||
<AlertDialog.Footer>
|
||||
<AlertDialog.Cancel>Cancel</AlertDialog.Cancel>
|
||||
<AlertDialog.Action onclick={handleConfirmReset}>Reset to Default</AlertDialog.Action>
|
||||
</AlertDialog.Footer>
|
||||
</AlertDialog.Content>
|
||||
</AlertDialog.Root>
|
||||
|
||||
@@ -87,7 +87,7 @@
|
||||
<Sidebar.GroupContent>
|
||||
<Sidebar.Menu>
|
||||
{#each filteredConversations as conversation (conversation.id)}
|
||||
<Sidebar.MenuItem class="mb-1" onclick={handleMobileSidebarItemClick}>
|
||||
<Sidebar.MenuItem class="mb-1">
|
||||
<ChatSidebarConversationItem
|
||||
conversation={{
|
||||
id: conversation.id,
|
||||
@@ -95,6 +95,7 @@
|
||||
lastModified: conversation.lastModified,
|
||||
currNode: conversation.currNode
|
||||
}}
|
||||
{handleMobileSidebarItemClick}
|
||||
isActive={currentChatId === conversation.id}
|
||||
onSelect={selectConversation}
|
||||
onEdit={editConversation}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
interface Props {
|
||||
isActive?: boolean;
|
||||
conversation: DatabaseConversation;
|
||||
handleMobileSidebarItemClick?: () => void;
|
||||
onDelete?: (id: string) => void;
|
||||
onEdit?: (id: string, name: string) => void;
|
||||
onSelect?: (id: string) => void;
|
||||
@@ -16,6 +17,7 @@
|
||||
|
||||
let {
|
||||
conversation,
|
||||
handleMobileSidebarItemClick,
|
||||
onDelete,
|
||||
onEdit,
|
||||
onSelect,
|
||||
@@ -47,6 +49,7 @@
|
||||
|
||||
function handleConfirmEdit() {
|
||||
if (!editedName.trim()) return;
|
||||
showEditDialog = false;
|
||||
onEdit?.(conversation.id, editedName);
|
||||
}
|
||||
|
||||
@@ -85,7 +88,12 @@
|
||||
: ''}"
|
||||
onclick={handleSelect}
|
||||
>
|
||||
<div class="text flex min-w-0 flex-1 items-center space-x-3">
|
||||
<!-- svelte-ignore a11y_click_events_have_key_events -->
|
||||
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||
<div
|
||||
class="text flex min-w-0 flex-1 items-center space-x-3"
|
||||
onclick={handleMobileSidebarItemClick}
|
||||
>
|
||||
<div class="min-w-0 flex-1">
|
||||
<p class="truncate text-sm font-medium">{conversation.name}</p>
|
||||
|
||||
@@ -178,5 +186,10 @@
|
||||
&:is(:hover) :global([data-slot='dropdown-menu-trigger']) {
|
||||
opacity: 1;
|
||||
}
|
||||
@media (max-width: 768px) {
|
||||
:global([data-slot='dropdown-menu-trigger']) {
|
||||
opacity: 1 !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
<DropdownMenu.Root bind:open>
|
||||
<DropdownMenu.Trigger
|
||||
class="flex h-6 w-6 cursor-pointer items-center justify-center rounded-md p-0 text-sm font-medium transition-colors hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground focus:outline-none disabled:pointer-events-none disabled:opacity-50 data-[state=open]:bg-accent data-[state=open]:text-accent-foreground {triggerClass}"
|
||||
onclick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{#if triggerTooltip}
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
@@ -53,7 +54,7 @@
|
||||
{/if}
|
||||
</DropdownMenu.Trigger>
|
||||
|
||||
<DropdownMenu.Content {align} class="z-999 w-48">
|
||||
<DropdownMenu.Content {align} class="z-[999999] w-48">
|
||||
{#each actions as action, index (action.label)}
|
||||
{#if action.separator && index > 0}
|
||||
<DropdownMenu.Separator />
|
||||
|
||||
@@ -19,7 +19,15 @@
|
||||
bind:ref
|
||||
data-slot="alert-dialog-content"
|
||||
class={cn(
|
||||
'fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border bg-background p-6 shadow-lg duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 sm:max-w-lg',
|
||||
'fixed z-[999999] grid w-full gap-4 border bg-background p-6 shadow-lg duration-200',
|
||||
// Mobile: Bottom sheet behavior
|
||||
'right-0 bottom-0 left-0 max-h-[100dvh] translate-x-0 translate-y-0 overflow-y-auto rounded-t-lg',
|
||||
'data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:slide-out-to-bottom-full',
|
||||
'data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:slide-in-from-bottom-full',
|
||||
// Desktop: Centered dialog behavior
|
||||
'sm:top-[50%] sm:right-auto sm:bottom-auto sm:left-[50%] sm:max-h-[100vh] sm:max-w-lg sm:translate-x-[-50%] sm:translate-y-[-50%] sm:rounded-lg',
|
||||
'sm:data-[state=closed]:slide-out-to-bottom-0 sm:data-[state=closed]:zoom-out-95',
|
||||
'sm:data-[state=open]:slide-in-from-bottom-0 sm:data-[state=open]:zoom-in-95',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -13,7 +13,10 @@
|
||||
<div
|
||||
bind:this={ref}
|
||||
data-slot="alert-dialog-footer"
|
||||
class={cn('flex flex-col-reverse gap-2 sm:flex-row sm:justify-end', className)}
|
||||
class={cn(
|
||||
'mt-6 flex flex-row gap-2 sm:mt-0 sm:justify-end [&>*]:flex-1 sm:[&>*]:flex-none',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
>
|
||||
{@render children?.()}
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
bind:ref
|
||||
data-slot="dialog-content"
|
||||
class={cn(
|
||||
'fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border border-border/30 bg-background p-6 shadow-lg duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 sm:max-w-lg',
|
||||
`fixed top-[50%] left-[50%] z-50 grid max-h-[100dvh] w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 overflow-y-auto rounded-lg border border-border/30 bg-background p-6 shadow-lg duration-200 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 sm:max-w-lg md:max-h-[100vh]`,
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
<script lang="ts">
|
||||
import { onDestroy, onMount } from 'svelte';
|
||||
import { Select as SelectPrimitive } from 'bits-ui';
|
||||
import SelectScrollUpButton from './select-scroll-up-button.svelte';
|
||||
import SelectScrollDownButton from './select-scroll-down-button.svelte';
|
||||
@@ -14,6 +15,76 @@
|
||||
}: WithoutChild<SelectPrimitive.ContentProps> & {
|
||||
portalProps?: SelectPrimitive.PortalProps;
|
||||
} = $props();
|
||||
|
||||
let cleanupInternalListeners: (() => void) | undefined;
|
||||
|
||||
onMount(() => {
|
||||
const listenerOptions: AddEventListenerOptions = { passive: false };
|
||||
|
||||
const blockOutsideWheel = (event: WheelEvent) => {
|
||||
if (!ref) {
|
||||
return;
|
||||
}
|
||||
|
||||
const target = event.target as Node | null;
|
||||
|
||||
if (!target || !ref.contains(target)) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
};
|
||||
|
||||
const blockOutsideTouchMove = (event: TouchEvent) => {
|
||||
if (!ref) {
|
||||
return;
|
||||
}
|
||||
|
||||
const target = event.target as Node | null;
|
||||
|
||||
if (!target || !ref.contains(target)) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
};
|
||||
|
||||
document.addEventListener('wheel', blockOutsideWheel, listenerOptions);
|
||||
document.addEventListener('touchmove', blockOutsideTouchMove, listenerOptions);
|
||||
|
||||
return () => {
|
||||
document.removeEventListener('wheel', blockOutsideWheel, listenerOptions);
|
||||
document.removeEventListener('touchmove', blockOutsideTouchMove, listenerOptions);
|
||||
};
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
const element = ref;
|
||||
|
||||
cleanupInternalListeners?.();
|
||||
|
||||
if (!element) {
|
||||
return;
|
||||
}
|
||||
|
||||
const stopWheelPropagation = (event: WheelEvent) => {
|
||||
event.stopPropagation();
|
||||
};
|
||||
|
||||
const stopTouchPropagation = (event: TouchEvent) => {
|
||||
event.stopPropagation();
|
||||
};
|
||||
|
||||
element.addEventListener('wheel', stopWheelPropagation);
|
||||
element.addEventListener('touchmove', stopTouchPropagation);
|
||||
|
||||
cleanupInternalListeners = () => {
|
||||
element.removeEventListener('wheel', stopWheelPropagation);
|
||||
element.removeEventListener('touchmove', stopTouchPropagation);
|
||||
};
|
||||
});
|
||||
|
||||
onDestroy(() => {
|
||||
cleanupInternalListeners?.();
|
||||
});
|
||||
</script>
|
||||
|
||||
<SelectPrimitive.Portal {...portalProps}>
|
||||
@@ -22,7 +93,7 @@
|
||||
{sideOffset}
|
||||
data-slot="select-content"
|
||||
class={cn(
|
||||
'relative z-50 max-h-(--bits-select-content-available-height) min-w-[8rem] origin-(--bits-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border bg-popover text-popover-foreground shadow-md data-[side=bottom]:translate-y-1 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:-translate-x-1 data-[side=left]:slide-in-from-right-2 data-[side=right]:translate-x-1 data-[side=right]:slide-in-from-left-2 data-[side=top]:-translate-y-1 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
'relative z-[var(--layer-popover,1000000)] max-h-(--bits-select-content-available-height) min-w-[8rem] origin-(--bits-select-content-transform-origin) overflow-x-hidden overflow-y-auto rounded-md border bg-popover text-popover-foreground shadow-md data-[side=bottom]:translate-y-1 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:-translate-x-1 data-[side=left]:slide-in-from-right-2 data-[side=right]:translate-x-1 data-[side=right]:slide-in-from-left-2 data-[side=top]:-translate-y-1 data-[side=top]:slide-in-from-bottom-2 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
/**
|
||||
* Parses thinking content from a message that may contain <think> tags
|
||||
* Parses thinking content from a message that may contain <think> tags or [THINK] tags
|
||||
* Returns an object with thinking content and cleaned message content
|
||||
* Handles both complete <think>...</think> blocks and incomplete <think> blocks (streaming)
|
||||
* Handles both complete blocks and incomplete blocks (streaming)
|
||||
* Supports formats: <think>...</think> and [THINK]...[/THINK]
|
||||
* @param content - The message content to parse
|
||||
* @returns An object containing the extracted thinking content and the cleaned message content
|
||||
*/
|
||||
@@ -9,12 +10,11 @@ export function parseThinkingContent(content: string): {
|
||||
thinking: string | null;
|
||||
cleanContent: string;
|
||||
} {
|
||||
const incompleteMatch = content.includes('<think>') && !content.includes('</think>');
|
||||
const incompleteThinkMatch = content.includes('<think>') && !content.includes('</think>');
|
||||
const incompleteThinkBracketMatch = content.includes('[THINK]') && !content.includes('[/THINK]');
|
||||
|
||||
if (incompleteMatch) {
|
||||
// Remove the entire <think>... part from clean content
|
||||
if (incompleteThinkMatch) {
|
||||
const cleanContent = content.split('</think>')?.[1]?.trim();
|
||||
// Extract everything after <think> as thinking content
|
||||
const thinkingContent = content.split('<think>')?.[1]?.trim();
|
||||
|
||||
return {
|
||||
@@ -23,12 +23,40 @@ export function parseThinkingContent(content: string): {
|
||||
};
|
||||
}
|
||||
|
||||
const completeMatch = content.includes('</think>');
|
||||
if (incompleteThinkBracketMatch) {
|
||||
const cleanContent = content.split('[/THINK]')?.[1]?.trim();
|
||||
const thinkingContent = content.split('[THINK]')?.[1]?.trim();
|
||||
|
||||
if (completeMatch) {
|
||||
return {
|
||||
thinking: content.split('</think>')?.[0]?.trim(),
|
||||
cleanContent: content.split('</think>')?.[1]?.trim()
|
||||
cleanContent,
|
||||
thinking: thinkingContent
|
||||
};
|
||||
}
|
||||
|
||||
const completeThinkMatch = content.match(/<think>([\s\S]*?)<\/think>/);
|
||||
const completeThinkBracketMatch = content.match(/\[THINK\]([\s\S]*?)\[\/THINK\]/);
|
||||
|
||||
if (completeThinkMatch) {
|
||||
const thinkingContent = completeThinkMatch[1]?.trim() ?? '';
|
||||
const cleanContent = `${content.slice(0, completeThinkMatch.index ?? 0)}${content.slice(
|
||||
(completeThinkMatch.index ?? 0) + completeThinkMatch[0].length
|
||||
)}`.trim();
|
||||
|
||||
return {
|
||||
thinking: thinkingContent,
|
||||
cleanContent
|
||||
};
|
||||
}
|
||||
|
||||
if (completeThinkBracketMatch) {
|
||||
const thinkingContent = completeThinkBracketMatch[1]?.trim() ?? '';
|
||||
const cleanContent = `${content.slice(0, completeThinkBracketMatch.index ?? 0)}${content.slice(
|
||||
(completeThinkBracketMatch.index ?? 0) + completeThinkBracketMatch[0].length
|
||||
)}`.trim();
|
||||
|
||||
return {
|
||||
thinking: thinkingContent,
|
||||
cleanContent
|
||||
};
|
||||
}
|
||||
|
||||
@@ -39,26 +67,33 @@ export function parseThinkingContent(content: string): {
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if content contains an opening <think> tag (for streaming)
|
||||
* Checks if content contains an opening thinking tag (for streaming)
|
||||
* Supports both <think> and [THINK] formats
|
||||
* @param content - The message content to check
|
||||
* @returns True if the content contains an opening <think> tag
|
||||
* @returns True if the content contains an opening thinking tag
|
||||
*/
|
||||
export function hasThinkingStart(content: string): boolean {
|
||||
return content.includes('<think>') || content.includes('<|channel|>analysis');
|
||||
return (
|
||||
content.includes('<think>') ||
|
||||
content.includes('[THINK]') ||
|
||||
content.includes('<|channel|>analysis')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if content contains a closing </think> tag (for streaming)
|
||||
* Checks if content contains a closing thinking tag (for streaming)
|
||||
* Supports both </think> and [/THINK] formats
|
||||
* @param content - The message content to check
|
||||
* @returns True if the content contains a closing </think> tag
|
||||
* @returns True if the content contains a closing thinking tag
|
||||
*/
|
||||
export function hasThinkingEnd(content: string): boolean {
|
||||
return content.includes('</think>');
|
||||
return content.includes('</think>') || content.includes('[/THINK]');
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts partial thinking content during streaming
|
||||
* Used when we have <think> but not yet </think>
|
||||
* Supports both <think> and [THINK] formats
|
||||
* Used when we have opening tag but not yet closing tag
|
||||
* @param content - The message content to extract partial thinking from
|
||||
* @returns An object containing the extracted partial thinking content and the remaining content
|
||||
*/
|
||||
@@ -66,23 +101,41 @@ export function extractPartialThinking(content: string): {
|
||||
thinking: string | null;
|
||||
remainingContent: string;
|
||||
} {
|
||||
const startIndex = content.indexOf('<think>');
|
||||
if (startIndex === -1) {
|
||||
const thinkStartIndex = content.indexOf('<think>');
|
||||
const thinkEndIndex = content.indexOf('</think>');
|
||||
|
||||
const bracketStartIndex = content.indexOf('[THINK]');
|
||||
const bracketEndIndex = content.indexOf('[/THINK]');
|
||||
|
||||
const useThinkFormat =
|
||||
thinkStartIndex !== -1 && (bracketStartIndex === -1 || thinkStartIndex < bracketStartIndex);
|
||||
const useBracketFormat =
|
||||
bracketStartIndex !== -1 && (thinkStartIndex === -1 || bracketStartIndex < thinkStartIndex);
|
||||
|
||||
if (useThinkFormat) {
|
||||
if (thinkEndIndex === -1) {
|
||||
const thinkingStart = thinkStartIndex + '<think>'.length;
|
||||
|
||||
return {
|
||||
thinking: content.substring(thinkingStart),
|
||||
remainingContent: content.substring(0, thinkStartIndex)
|
||||
};
|
||||
}
|
||||
} else if (useBracketFormat) {
|
||||
if (bracketEndIndex === -1) {
|
||||
const thinkingStart = bracketStartIndex + '[THINK]'.length;
|
||||
|
||||
return {
|
||||
thinking: content.substring(thinkingStart),
|
||||
remainingContent: content.substring(0, bracketStartIndex)
|
||||
};
|
||||
}
|
||||
} else {
|
||||
return { thinking: null, remainingContent: content };
|
||||
}
|
||||
|
||||
const endIndex = content.indexOf('</think>');
|
||||
if (endIndex === -1) {
|
||||
// Still streaming thinking content
|
||||
const thinkingStart = startIndex + '<think>'.length;
|
||||
return {
|
||||
thinking: content.substring(thinkingStart),
|
||||
remainingContent: content.substring(0, startIndex)
|
||||
};
|
||||
}
|
||||
|
||||
// Complete thinking block found
|
||||
const parsed = parseThinkingContent(content);
|
||||
|
||||
return {
|
||||
thinking: parsed.thinking,
|
||||
remainingContent: parsed.cleanContent
|
||||
|
||||
@@ -59,6 +59,60 @@
|
||||
thinking: '',
|
||||
children: []
|
||||
});
|
||||
|
||||
// Message with <think> format thinking content
|
||||
const thinkTagMessage: DatabaseMessage = {
|
||||
id: '6',
|
||||
convId: 'conv-1',
|
||||
type: 'message',
|
||||
timestamp: Date.now() - 1000 * 60 * 2,
|
||||
role: 'assistant',
|
||||
content:
|
||||
"<think>\nLet me analyze this step by step:\n\n1. The user is asking about thinking formats\n2. I need to demonstrate the <think> tag format\n3. This content should be displayed in the thinking section\n4. The main response should be separate\n\nThis is a good example of reasoning content.\n</think>\n\nHere's my response after thinking through the problem. The thinking content above should be displayed separately from this main response content.",
|
||||
parent: '1',
|
||||
thinking: '',
|
||||
children: []
|
||||
};
|
||||
|
||||
// Message with [THINK] format thinking content
|
||||
const thinkBracketMessage: DatabaseMessage = {
|
||||
id: '7',
|
||||
convId: 'conv-1',
|
||||
type: 'message',
|
||||
timestamp: Date.now() - 1000 * 60 * 1,
|
||||
role: 'assistant',
|
||||
content:
|
||||
'[THINK]\nThis is the DeepSeek-style thinking format:\n\n- Using square brackets instead of angle brackets\n- Should work identically to the <think> format\n- Content parsing should extract this reasoning\n- Display should be the same as <think> format\n\nBoth formats should be supported seamlessly.\n[/THINK]\n\nThis is the main response content that comes after the [THINK] block. The reasoning above should be parsed and displayed in the thinking section.',
|
||||
parent: '1',
|
||||
thinking: '',
|
||||
children: []
|
||||
};
|
||||
|
||||
// Streaming message for <think> format
|
||||
let streamingThinkMessage = $state({
|
||||
id: '8',
|
||||
convId: 'conv-1',
|
||||
type: 'message',
|
||||
timestamp: 0, // No timestamp = streaming
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
parent: '1',
|
||||
thinking: '',
|
||||
children: []
|
||||
});
|
||||
|
||||
// Streaming message for [THINK] format
|
||||
let streamingBracketMessage = $state({
|
||||
id: '9',
|
||||
convId: 'conv-1',
|
||||
type: 'message',
|
||||
timestamp: 0, // No timestamp = streaming
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
parent: '1',
|
||||
thinking: '',
|
||||
children: []
|
||||
});
|
||||
</script>
|
||||
|
||||
<Story
|
||||
@@ -144,3 +198,115 @@
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story
|
||||
name="ThinkTagFormat"
|
||||
args={{
|
||||
class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
|
||||
message: thinkTagMessage
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story
|
||||
name="ThinkBracketFormat"
|
||||
args={{
|
||||
class: 'max-w-[56rem] w-[calc(100vw-2rem)]',
|
||||
message: thinkBracketMessage
|
||||
}}
|
||||
/>
|
||||
|
||||
<Story
|
||||
name="StreamingThinkTag"
|
||||
args={{
|
||||
message: streamingThinkMessage
|
||||
}}
|
||||
parameters={{
|
||||
test: {
|
||||
timeout: 30000
|
||||
}
|
||||
}}
|
||||
asChild
|
||||
play={async () => {
|
||||
// Phase 1: Stream <think> reasoning content
|
||||
const thinkingContent =
|
||||
'Let me work through this problem systematically:\n\n1. First, I need to understand what the user is asking\n2. Then I should consider different approaches\n3. I need to evaluate the pros and cons\n4. Finally, I should provide a clear recommendation\n\nThis step-by-step approach will ensure accuracy.';
|
||||
|
||||
let currentContent = '<think>\n';
|
||||
streamingThinkMessage.content = currentContent;
|
||||
|
||||
for (let i = 0; i < thinkingContent.length; i++) {
|
||||
currentContent += thinkingContent[i];
|
||||
streamingThinkMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
}
|
||||
|
||||
// Close the thinking block
|
||||
currentContent += '\n</think>\n\n';
|
||||
streamingThinkMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
|
||||
// Phase 2: Stream main response content
|
||||
const responseContent =
|
||||
"Based on my analysis above, here's the solution:\n\n**Key Points:**\n- The approach should be systematic\n- We need to consider all factors\n- Implementation should be step-by-step\n\nThis ensures the best possible outcome.";
|
||||
|
||||
for (let i = 0; i < responseContent.length; i++) {
|
||||
currentContent += responseContent[i];
|
||||
streamingThinkMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
|
||||
streamingThinkMessage.timestamp = Date.now();
|
||||
}}
|
||||
>
|
||||
<div class="w-[56rem]">
|
||||
<ChatMessage message={streamingThinkMessage} />
|
||||
</div>
|
||||
</Story>
|
||||
|
||||
<Story
|
||||
name="StreamingThinkBracket"
|
||||
args={{
|
||||
message: streamingBracketMessage
|
||||
}}
|
||||
parameters={{
|
||||
test: {
|
||||
timeout: 30000
|
||||
}
|
||||
}}
|
||||
asChild
|
||||
play={async () => {
|
||||
// Phase 1: Stream [THINK] reasoning content
|
||||
const thinkingContent =
|
||||
'Using the DeepSeek format now:\n\n- This demonstrates the [THINK] bracket format\n- Should parse identically to <think> tags\n- The UI should display this in the thinking section\n- Main content should be separate\n\nBoth formats provide the same functionality.';
|
||||
|
||||
let currentContent = '[THINK]\n';
|
||||
streamingBracketMessage.content = currentContent;
|
||||
|
||||
for (let i = 0; i < thinkingContent.length; i++) {
|
||||
currentContent += thinkingContent[i];
|
||||
streamingBracketMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
}
|
||||
|
||||
// Close the thinking block
|
||||
currentContent += '\n[/THINK]\n\n';
|
||||
streamingBracketMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
|
||||
// Phase 2: Stream main response content
|
||||
const responseContent =
|
||||
"Here's my response after using the [THINK] format:\n\n**Observations:**\n- Both <think> and [THINK] formats work seamlessly\n- The parsing logic handles both cases\n- UI display is consistent across formats\n\nThis demonstrates the enhanced thinking content support.";
|
||||
|
||||
for (let i = 0; i < responseContent.length; i++) {
|
||||
currentContent += responseContent[i];
|
||||
streamingBracketMessage.content = currentContent;
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
}
|
||||
|
||||
streamingBracketMessage.timestamp = Date.now();
|
||||
}}
|
||||
>
|
||||
<div class="w-[56rem]">
|
||||
<ChatMessage message={streamingBracketMessage} />
|
||||
</div>
|
||||
</Story>
|
||||
|
||||
Reference in New Issue
Block a user