Compare commits

...

12 Commits
b7035 ... b7047

Author SHA1 Message Date
Georgi Gerganov
2776db6c81 Revert "ggml-cpu: handle 3d tensors in repack mat_mul (#17030)" (#17233)
This reverts commit 1c398dc9ec.
2025-11-13 12:59:37 +02:00
Diego Devesa
879dec341a ggml-cpu : use template for argsort (#17222) 2025-11-13 10:59:05 +02:00
TecJesh
97d5117217 CANN: Add cross_entropy_loss op support (#16886)
* update L2_NORM op support

* update L2_NORM op support

* remove extra whitespace

* cann: update cross_entropy_loss op support

* remove trailing whitespaces

* rebase the latest code in the main repository and remove the l2_norm operator that already exists in another pull request.

* undo the l2_norm operator deletion
2025-11-13 09:39:51 +08:00
Aman Gupta
a90eb94ca9 CUDA: fuse rope + set_rows (#16884)
* CUDA: add fused rope

* move k forward_expand up

* create helper function instead of re-using params

* make assert statement more in line with comment

* rope_norm: coalesced writes to global mem
2025-11-13 08:50:01 +08:00
Neo Zhang Jianyu
07751f8d44 update SYCL support OPs (#17208)
Co-authored-by: Zhang Jianyu <zhang.jianyu@outlook.com>
2025-11-13 08:42:23 +08:00
o7si
ffb6f3d921 vocab : correct bounds check for UGM XCDA array access (#17215) 2025-11-12 23:41:02 +01:00
Johannes Gäßler
5d6838b74f CUDA: static assert to prevent misuse of memcpy_1 (#17198) 2025-11-12 23:13:55 +01:00
Mike Abbott
92bb442ad9 docker : preserve .so symlinks for docker container builds (#17214) 2025-11-12 20:33:55 +01:00
Georgi Gerganov
374fe09cdd ggml : use std::sort in ggml_argsort CPU implementation (#17211)
* ggml : use std::sort in ggml_argsort CPU implementation

* cont : add missing header
2025-11-12 20:43:38 +02:00
Aleksander Grygier
8e878f0cb4 Update packages + upgrade Storybook to v10 (#17201)
* chore: Update packages + upgrade Storybook to v10

* fix: Increase timeout for UI tests
2025-11-12 19:01:48 +01:00
Xuan-Son Nguyen
00c94083b3 server: (refactor) implement generator-based API for task results (#17174)
* server: (refactor) implement generator-based API for task results

* improve

* moving some code

* fix "Response ended prematurely"

* add sink.done before return false

* rm redundant check

* rm unused var

* rename generator --> reader
2025-11-12 18:50:52 +01:00
Xuan-Son Nguyen
017eceed61 ci: add check vendor job (#17179)
* ci: add check vendor job

* use dev version of miniaudio

* move to dedicated workflow, only run on related files changed
2025-11-12 14:56:02 +01:00
33 changed files with 3376 additions and 3028 deletions

View File

@@ -49,7 +49,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
# -- Organize build artifacts for copying in later stages --
# Create a lib directory to store all .so files
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
# Create a full directory to store all executables and Python scripts
RUN mkdir -p /app/full && \

View File

@@ -20,7 +20,7 @@ RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
cmake --build build -j $(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

View File

@@ -25,7 +25,7 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

View File

@@ -21,7 +21,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

View File

@@ -32,7 +32,7 @@ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

View File

@@ -45,7 +45,7 @@ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
&& cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib \
&& find build -name "*.so" -exec cp {} /app/lib \;
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

View File

@@ -20,7 +20,7 @@ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -D
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \

52
.github/workflows/check-vendor.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
name: Check vendor
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'vendor/**',
'scripts/sync_vendor.py'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'vendor/**',
'scripts/sync_vendor.py'
]
jobs:
check-vendor:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Run vendor sync
run: |
set -euo pipefail
python3 scripts/sync_vendor.py
- name: Check for changes
run: |
set -euo pipefail
# detect modified or untracked files
changed=$(git status --porcelain --untracked-files=all || true)
if [ -n "$changed" ]; then
echo "Vendor sync modified files:"
echo "$changed" | awk '{ print $2 }' | sed '/^$/d'
echo "Failing because vendor files mismatch. Please update scripts/sync_vendor.py"
exit 1
else
echo "Vendor files are up-to-date."
fi

View File

@@ -209,7 +209,7 @@ jobs:
working-directory: tools/server/webui
- name: Run UI tests
run: npm run test:ui
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/server/webui
- name: Run E2E tests

View File

@@ -19,10 +19,10 @@ Legend:
| ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
| ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
| ADD_ID | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | ❌ | ❌ |
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | ❌ | ❌ |
| ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | | ❌ | ❌ |
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ |
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
@@ -42,7 +42,7 @@ Legend:
| ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
| EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
| FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | | ❌ | ❌ |
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
| GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
| GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
| GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
@@ -61,7 +61,7 @@ Legend:
| L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
| LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
| LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | ❌ | ❌ |
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | ❌ | ❌ |
| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
| MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ |
@@ -77,18 +77,18 @@ Legend:
| REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
| RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
| REPEAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | 🟡 | ❌ |
| REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | | ✅ | ❌ |
| REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | | ✅ | ❌ |
| RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ |
| RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | ✅ | ❌ |
| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | ✅ | ❌ |
| ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | | ❌ | ❌ |
| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
| RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
| RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
| SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| SET | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | | ❌ | ❌ |
| SET | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | 🟡 | ❌ | ❌ |
| SET_ROWS | ❌ | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
| SGN | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
| SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
@@ -100,17 +100,17 @@ Legend:
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ❌ | ❌ |
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | | ✅ | ❌ |
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | | ✅ | ❌ |
| SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
| STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
| SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
| SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | ✅ | ❌ |
| SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | 🟡 | ✅ | ❌ |
| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ |
| SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
| SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
| TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | | ❌ | ❌ |
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
| XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |

File diff suppressed because it is too large Load Diff

View File

@@ -477,6 +477,92 @@ void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
}
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src0 = dst->src[0];
ggml_tensor * src1 = dst->src[1];
const int64_t nc = src0->ne[0];
const int64_t nr = ggml_nrows(src0);
int64_t logits_ne[] = {nc, nr};
size_t logits_nb[2];
logits_nb[0] = ggml_type_size(src0->type);
logits_nb[1] = logits_nb[0] * logits_ne[0];
aclTensor * acl_logits = ggml_cann_create_tensor(src0->data, ACL_FLOAT, sizeof(float), logits_ne, logits_nb, 2);
size_t log_softmax_type_size = sizeof(float);
int64_t log_softmax_n_bytes = nr * nc * log_softmax_type_size;
ggml_cann_pool_alloc log_softmax_allocator(ctx.pool(), log_softmax_n_bytes);
void * log_softmax_buffer = log_softmax_allocator.get();
int64_t log_softmax_ne[] = {nc, nr};
size_t log_softmax_nb[2];
log_softmax_nb[0] = log_softmax_type_size;
log_softmax_nb[1] = log_softmax_nb[0] * log_softmax_ne[0];
aclTensor * acl_log_softmax = ggml_cann_create_tensor(log_softmax_buffer, ACL_FLOAT, log_softmax_type_size, log_softmax_ne, log_softmax_nb, 2);
GGML_CANN_CALL_ACLNN_OP(ctx, LogSoftmax, acl_logits, 1, acl_log_softmax);
int64_t labels_ne[] = {nc, nr};
size_t labels_nb[2];
labels_nb[0] = ggml_type_size(src1->type);
labels_nb[1] = labels_nb[0] * labels_ne[0];
aclTensor * acl_labels = ggml_cann_create_tensor(src1->data, ACL_FLOAT, sizeof(float), labels_ne, labels_nb, 2);
size_t mul_type_size = sizeof(float);
int64_t mul_n_bytes = nr * nc * mul_type_size;
ggml_cann_pool_alloc mul_allocator(ctx.pool(), mul_n_bytes);
void * mul_buffer = mul_allocator.get();
int64_t mul_ne[] = {nc, nr};
size_t mul_nb[2];
mul_nb[0] = mul_type_size;
mul_nb[1] = mul_nb[0] * mul_ne[0];
aclTensor * acl_mul_result = ggml_cann_create_tensor(mul_buffer, ACL_FLOAT, mul_type_size, mul_ne, mul_nb, 2);
GGML_CANN_CALL_ACLNN_OP(ctx, Mul, acl_log_softmax, acl_labels, acl_mul_result);
size_t sum_per_sample_type_size = sizeof(float);
int64_t sum_per_sample_n_bytes = nr * sum_per_sample_type_size;
ggml_cann_pool_alloc sum_per_sample_allocator(ctx.pool(), sum_per_sample_n_bytes);
void * sum_per_sample_buffer = sum_per_sample_allocator.get();
int64_t sum_per_sample_ne[] = {nr};
size_t sum_per_sample_nb[1];
sum_per_sample_nb[0] = sum_per_sample_type_size;
aclTensor * acl_sum_per_sample = ggml_cann_create_tensor(sum_per_sample_buffer, ACL_FLOAT, sum_per_sample_type_size, sum_per_sample_ne, sum_per_sample_nb, 1);
std::vector<int64_t> sum_dims = {1};
aclIntArray * dims_array = aclCreateIntArray(sum_dims.data(), sum_dims.size());
bool keep_dims = false;
GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_mul_result, dims_array, keep_dims, ACL_FLOAT, acl_sum_per_sample);
size_t total_sum_type_size = sizeof(float);
int64_t total_sum_n_bytes = 1 * total_sum_type_size;
ggml_cann_pool_alloc total_sum_allocator(ctx.pool(), total_sum_n_bytes);
void * total_sum_buffer = total_sum_allocator.get();
int64_t total_sum_ne[] = {1};
size_t total_sum_nb[1];
total_sum_nb[0] = total_sum_type_size;
aclTensor * acl_total_sum = ggml_cann_create_tensor(total_sum_buffer, ACL_FLOAT, total_sum_type_size, total_sum_ne, total_sum_nb, 1);
std::vector<int64_t> total_sum_dims = {0};
aclIntArray * total_sum_dims_array = aclCreateIntArray(total_sum_dims.data(), total_sum_dims.size());
GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_sum_per_sample, total_sum_dims_array, keep_dims, ACL_FLOAT, acl_total_sum);
float value = -1.0f / static_cast<float>(nr);
aclScalar * scale_factor = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
aclTensor * acl_dst = ggml_cann_create_tensor(dst->data, ACL_FLOAT, sizeof(float), total_sum_ne, total_sum_nb, 1);
GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_total_sum, scale_factor, acl_dst);
ggml_cann_release_resources(ctx, acl_logits, acl_log_softmax, acl_labels, acl_mul_result, acl_sum_per_sample, acl_total_sum, acl_dst, scale_factor, dims_array, total_sum_dims_array);
}
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src = dst->src[0];

View File

@@ -47,6 +47,7 @@
#include <aclnnop/aclnn_log.h>
#include <aclnnop/aclnn_sign.h>
#include <aclnnop/aclnn_norm.h>
#include <aclnnop/aclnn_logsoftmax.h>
#include "acl_tensor.h"
#include "common.h"
@@ -211,6 +212,43 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
*/
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/**
* @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
* backend.
*
* @details This function computes the cross entropy loss between the predicted
* logits and target probability distributions. The operation follows
* the same computation pattern as the CPU implementation:
* 1. Applies log_softmax to the logits along the class dimension
* 2. Element-wise multiplication with target distributions
* 3. Summation along the class dimension to get per-sample losses
* 4. Global summation and scaling by -1/nr to get final loss
*
* The computation can be expressed as:
* \f[
* \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
* \f]
* where \f$N\f$ is the total number of samples, \f$C\f$ is the number
* of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
* probability distributions.
*
* @param ctx The CANN context used for operations.
* @param dst The destination tensor where the computed loss will be stored.
* This should be a scalar tensor containing the final loss value.
*
* @note This implementation computes cross entropy between probability
* distributions, not the typical classification cross entropy that
* expects class indices as targets. Both input tensors (src0 and src1)
* should have the same shape and represent probability distributions
* over the class dimension.
* @note The function expects two source tensors:
* - dst->src[0]: Logits tensor (before softmax)
* - dst->src[1]: Target probability distributions tensor
* @note The computation is performed using CANN backend operators including
* LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
*/
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/**
* @brief Computes the Group Normalization for a ggml tensor using the CANN
* backend.

View File

@@ -1780,6 +1780,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
case GGML_OP_L2_NORM:
ggml_cann_l2_norm(ctx, dst);
break;
case GGML_OP_CROSS_ENTROPY_LOSS:
ggml_cann_cross_entropy_loss(ctx, dst);
break;
case GGML_OP_CONCAT:
ggml_cann_concat(ctx, dst);
break;
@@ -2519,6 +2522,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
}
case GGML_OP_L2_NORM:
case GGML_OP_CROSS_ENTROPY_LOSS:
case GGML_OP_DUP:
case GGML_OP_SUM:
case GGML_OP_IM2COL:

View File

@@ -7,8 +7,9 @@
#include "unary-ops.h"
#include "vec.h"
#include <float.h>
#include <cfloat>
#include <algorithm>
#include <functional>
// ggml_compute_forward_dup
@@ -7664,6 +7665,18 @@ void ggml_compute_forward_timestep_embedding(
// ggml_compute_forward_argsort
template<enum ggml_sort_order order>
struct argsort_cmp {
const float * data;
bool operator()(int32_t a, int32_t b) const {
if constexpr (order == GGML_SORT_ORDER_ASC) {
return data[a] < data[b];
} else {
return data[a] > data[b];
}
}
};
static void ggml_compute_forward_argsort_f32(
const ggml_compute_params * params,
ggml_tensor * dst) {
@@ -7682,23 +7695,25 @@ static void ggml_compute_forward_argsort_f32(
ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
for (int64_t i = ith; i < nr; i += nth) {
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
const float * src_data = (float *)((char *) src0->data + i*nb01);
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
for (int64_t j = 0; j < ne0; j++) {
dst_data[j] = j;
}
// C doesn't have a functional sort, so we do a bubble sort instead
for (int64_t j = 0; j < ne0; j++) {
for (int64_t k = j + 1; k < ne0; k++) {
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
int32_t tmp = dst_data[j];
dst_data[j] = dst_data[k];
dst_data[k] = tmp;
}
}
switch (order) {
case GGML_SORT_ORDER_ASC:
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_ASC>{src_data});
break;
case GGML_SORT_ORDER_DESC:
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_DESC>{src_data});
break;
default:
GGML_ABORT("invalid sort order");
}
}
}

View File

@@ -1600,52 +1600,29 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
return false;
}
void forward_mul_mat_one_chunk(ggml_compute_params * params,
ggml_tensor * op,
int64_t src0_start,
int64_t src0_end,
int64_t src1_start,
int64_t src1_end) {
void forward_mul_mat_one_chunk(ggml_compute_params * params, ggml_tensor * op, int64_t src0_start, int64_t src0_end) {
const ggml_tensor * src0 = op->src[0];
const ggml_tensor * src1 = op->src[1];
ggml_tensor * dst = op;
GGML_TENSOR_BINARY_OP_LOCALS
const void * src1_wdata = params->wdata;
const size_t src1_col_stride = ggml_row_size(PARAM_TYPE, ne10);
GGML_ASSERT(ne03 == 1 && ne13 == 1);
GGML_ASSERT(ne12 % ne02 == 0);
const int64_t r2 = ne12 / ne02;
const int64_t i12 = src1_start / ne1;
const int64_t i11 = src1_start - i12 * ne1;
// Determine batch index
const int64_t i02 = i12 / r2;
const int64_t i1 = i11;
const int64_t i2 = i12;
const char * src0_ptr = (const char *) src0->data + i02 * nb02;
const char * src1_ptr = (const char *) params->wdata + (i11 + i12 * ne11) * src1_col_stride;
char * dst_ptr = ((char *) dst->data + (i1 * nb1 + i2 * nb2));
const int64_t nrows = src1_end - src1_start;
const int64_t ncols = src0_end - src0_start;
GGML_ASSERT(src1_ptr + src1_col_stride * nrows <= (const char *) params->wdata + params->wsize);
// If there are more than three rows in src1, use gemm; otherwise, use gemv.
if (nrows > 3) {
gemm<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00, (float *) (dst_ptr) + src0_start, nb1 / nb0,
src0_ptr + src0_start * nb01, src1_ptr,
nrows - (nrows % 4), ncols);
if (ne11 > 3) {
gemm<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00,
(float *) ((char *) dst->data) + src0_start, ne01,
(const char *) src0->data + src0_start * nb01,
(const char *) src1_wdata, ne11 - ne11 % 4, src0_end - src0_start);
}
for (int iter = nrows - (nrows % 4); iter < nrows; iter++) {
gemv<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00, (float *) (dst_ptr + (iter * nb1)) + src0_start,
ne01, src0_ptr + src0_start * nb01,
src1_ptr + (src1_col_stride * iter), 1 /* nrows */, ncols);
for (int iter = ne11 - ne11 % 4; iter < ne11; iter++) {
gemv<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00,
(float *) ((char *) dst->data + (iter * nb1)) + src0_start, ne01,
(const char *) src0->data + src0_start * nb01,
(const char *) src1_wdata + (src1_col_stride * iter), 1,
src0_end - src0_start);
}
}
@@ -1670,12 +1647,6 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
GGML_ASSERT(nb1 <= nb2);
GGML_ASSERT(nb2 <= nb3);
// TODO: General batched mul mat for 4D tensors
// Currently only supports 3D tensors
GGML_ASSERT(ne03 == 1);
GGML_ASSERT(ne13 == 1);
GGML_ASSERT(ne3 == 1);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(ggml_n_dims(op->src[0]) == 2);
@@ -1683,60 +1654,47 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
char * wdata = static_cast<char *>(params->wdata);
const size_t nbw1 = ggml_row_size(PARAM_TYPE, ne10);
const size_t nbw2 = nbw1 * ne11;
assert(params->wsize >= nbw2 * ne12);
assert(params->wsize >= nbw1 * ne11);
const ggml_from_float_t from_float = ggml_get_type_traits_cpu(PARAM_TYPE)->from_float;
for (int64_t i12 = 0; i12 < ne12; i12++) {
char * data_ptr = (char *) src1->data + i12 * nb12;
char * wdata_ptr = wdata + i12 * nbw2;
int64_t i11_processed = 0;
for (int64_t i11 = ith * 4; i11 < ne11 - ne11 % 4; i11 += nth * 4) {
ggml_quantize_mat_t<INTER_SIZE, PARAM_TYPE>((float *) ((char *) src1->data + i11 * nb11), (void *) (wdata + i11 * nbw1), 4, ne10);
}
for (int64_t i11 = ith * 4; i11 < ne11 - ne11 % 4; i11 += nth * 4) {
ggml_quantize_mat_t<INTER_SIZE, PARAM_TYPE>((float *) (data_ptr + i11 * nb11),
(void *) (wdata_ptr + i11 * nbw1), 4, ne10);
}
const int64_t i11_processed = ne11 - ne11 % 4;
for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
from_float((float *) (data_ptr + i11 * nb11), (void *) (wdata_ptr + i11 * nbw1), ne10);
}
i11_processed = ne11 - ne11 % 4;
for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
from_float((float *) ((char *) src1->data + i11 * nb11), (void *) (wdata + i11 * nbw1), ne10);
}
// disable for NUMA
const bool disable_chunking = ggml_is_numa();
// 4x chunks per thread
const int64_t nr0 = ggml_nrows(op->src[0]);
const int64_t nr1 = ne1 * ne2 * ne3;
int nth_scaled = nth * 4;
int64_t chunk_size0 = (nr0 + nth_scaled - 1) / nth_scaled;
// avoid too small chunks for narrow src1
int64_t chunk_size1 = MAX(16, (nr1 + nth - 1) / nth);
int64_t nchunk0 = (nr0 + chunk_size0 - 1) / chunk_size0;
int64_t nchunk1 = (nr1 + chunk_size1 - 1) / chunk_size1;
int64_t nr = ggml_nrows(op->src[0]);
int nth_scaled = nth * 4;
int64_t chunk_size = (nr + nth_scaled - 1) / nth_scaled;
int64_t nchunk = (nr + chunk_size - 1) / chunk_size;
// Ensure minimum chunk size to avoid alignment issues with high thread counts
// Minimum chunk size should be at least NB_COLS to prevent overlapping chunks after alignment
const int64_t min_chunk_size = NB_COLS;
if (nchunk0 > 0 && (nr0 / nchunk0) < min_chunk_size && nr0 >= min_chunk_size) {
nchunk0 = (nr0 + min_chunk_size - 1) / min_chunk_size;
if (nchunk > 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) {
nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
}
if (nth == 1 || nchunk0 * nchunk1 < nth || disable_chunking) {
nchunk0 = nr0 > nr1 ? nth : 1;
nchunk1 = nr0 > nr1 ? 1 : nth;
if (nth == 1 || nchunk < nth || disable_chunking) {
nchunk = nth;
}
const int64_t dr0 = (nr0 + nchunk0 - 1) / nchunk0;
const int64_t dr1 = (nr1 + nchunk1 - 1) / nchunk1;
// Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size
// This prevents creating too many tiny chunks that could overlap after alignment
const int64_t max_nchunk = (nr0 + min_chunk_size - 1) / min_chunk_size;
nchunk0 = MIN(nchunk0, max_nchunk);
const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
if (nchunk > max_nchunk) {
nchunk = max_nchunk;
}
if (ith == 0) {
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
@@ -1748,29 +1706,23 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
// The first chunk comes from our thread_id, the rest will get auto-assigned.
int current_chunk = ith;
while (current_chunk < nchunk0 * nchunk1) {
const int64_t ith0 = current_chunk % nchunk0;
const int64_t ith1 = current_chunk / nchunk0;
int64_t src0_start = dr0 * ith0;
int64_t src0_end = MIN(src0_start + dr0, nr0);
int64_t src1_start = dr1 * ith1;
int64_t src1_end = MIN(src1_start + dr1, nr1);
while (current_chunk < nchunk) {
int64_t src0_start = (current_chunk * ne01) / nchunk;
int64_t src0_end = ((current_chunk + 1) * ne01) / nchunk;
// Align boundaries to NB_COLS - round up to ensure all data is included
// The chunk size limiting above ensures chunks are large enough to prevent overlaps
src0_start = (src0_start % NB_COLS) ? src0_start + NB_COLS - (src0_start % NB_COLS) : src0_start;
src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end;
src0_end = MIN(src0_end, ne01);
// Make sure current plane is the last one before exiting
if (src0_start >= src0_end) {
current_chunk = ggml_threadpool_chunk_add(params->threadpool, 1);
continue;
src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end;
if (src0_end > ne01) {
src0_end = ne01;
}
forward_mul_mat_one_chunk(params, dst, src0_start, src0_end, src1_start, src1_end);
if (src0_start >= src0_end) {
break;
}
forward_mul_mat_one_chunk(params, dst, src0_start, src0_end);
current_chunk = ggml_threadpool_chunk_add(params->threadpool, 1);
}

View File

@@ -586,6 +586,12 @@ static __device__ __forceinline__ void ggml_cuda_mad(half2 & acc, const half2 v,
// If dst and src point at different address spaces then they are guaranteed to not be aliased.
template <int nbytes, int alignment = 0>
static __device__ __forceinline__ void ggml_cuda_memcpy_1(void * __restrict__ dst, const void * __restrict__ src) {
static_assert(
nbytes <= ggml_cuda_get_max_cpy_bytes() || alignment == 0,
"You are misusing the alignment parameter for ggml_cuda_memcpy_1. "
"The intent is for the parameter is only as a workaround if either one of the pointers is not properly aligned. "
"If you use it to do more bytes per copy than ggml_cuda_max_cpy_bytes() the reads and writes may not be coalesced. "
"Call ggml_cuda_memcpy_1 in a loop instead.");
if constexpr (alignment != 0) {
static_assert(nbytes % alignment == 0, "bad alignment");
}

View File

@@ -2992,6 +2992,36 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
}
#endif
static bool ggml_cuda_should_fuse_rope_set_rows(const ggml_tensor * rope,
const ggml_tensor * view,
const ggml_tensor * set_rows) {
// ne3 not tested
if (rope->src[0]->ne[3] != 1) {
return false;
}
if (set_rows->type != GGML_TYPE_F32 && set_rows->type != GGML_TYPE_F16) {
return false;
}
if (set_rows->src[1]->type != GGML_TYPE_I64) {
return false;
}
// The view should flatten two dims of rope into one dim
if (!ggml_is_contiguous(view) || view->ne[0] != rope->ne[0] * rope->ne[1]) {
return false;
}
// Only norm/neox shaders have the fusion code
const int mode = ((const int32_t *) rope->op_params)[2];
if (mode != GGML_ROPE_TYPE_NORMAL && mode != GGML_ROPE_TYPE_NEOX) {
return false;
}
return true;
}
static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops, std::initializer_list<enum ggml_unary_op> unary_ops) {
#ifndef NDEBUG
const size_t num_unary = std::count(ops.begin(), ops.end(), GGML_OP_UNARY);
@@ -3067,6 +3097,16 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
}
}
if (ops.size() == 3 && ggml_can_fuse_subgraph(cgraph, node_idx, ops, { node_idx + 2 })) {
const ggml_tensor * rope = cgraph->nodes[node_idx];
const ggml_tensor * view = cgraph->nodes[node_idx + 1];
const ggml_tensor * set_rows = cgraph->nodes[node_idx + 2];
if (ggml_cuda_should_fuse_rope_set_rows(rope, view, set_rows)) {
return true;
}
}
if (!ggml_can_fuse(cgraph, node_idx, ops)) {
return false;
}
@@ -3196,6 +3236,15 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
continue;
}
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
ggml_tensor * rope = cgraph->nodes[i];
ggml_tensor * set_rows = cgraph->nodes[i + 2];
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
i += 2;
continue;
}
if (node->op == GGML_OP_ADD) {
int n_fuse = 0;
ggml_op ops[8];

View File

@@ -1,3 +1,6 @@
#include "convert.cuh"
#include "ggml-cuda/common.cuh"
#include "ggml.h"
#include "rope.cuh"
struct rope_corr_dims {
@@ -37,11 +40,23 @@ static __device__ void rope_yarn(
}
}
template<bool forward, bool has_ff, typename T>
static __global__ void rope_norm(
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
const int32_t * pos, const float freq_scale, const float ext_factor, const float attn_factor,
const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors) {
template <bool forward, bool has_ff, typename T, typename D>
static __global__ void rope_norm(const T * x,
D * dst,
const int ne0,
const int ne1,
const int s1,
const int s2,
const int n_dims,
const int32_t * pos,
const float freq_scale,
const float ext_factor,
const float attn_factor,
const rope_corr_dims corr_dims,
const float theta_scale,
const float * freq_factors,
const int64_t * row_indices,
const int set_rows_stride) {
const int i0 = 2*(blockDim.y*blockIdx.y + threadIdx.y);
if (i0 >= ne0) {
@@ -53,13 +68,27 @@ static __global__ void rope_norm(
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = row_dst*ne0 + i0;
int idst = row_dst * ne0 + i0;
const int ix = channel_x*s2 + row_x*s1 + i0;
if (i0 >= n_dims) {
dst[idst + 0] = x[ix + 0];
dst[idst + 1] = x[ix + 1];
// Fusion optimization: ROPE + VIEW + SET_ROWS.
// The rope output is viewed as a 1D tensor and offset based on a row index in row_indices.
if (set_rows_stride != 0) {
idst = row_x * ne0 + i0;
idst += row_indices[channel_x] * set_rows_stride;
}
const auto & store_coaelsced = [&](float x0, float x1) {
if constexpr (std::is_same_v<float, D>) {
float2 v = make_float2(x0, x1);
ggml_cuda_memcpy_1<8>(dst + idst, &v);
} else if constexpr (std::is_same_v<half, D>) {
half2 v = make_half2(x0, x1);
ggml_cuda_memcpy_1<4>(dst + idst, &v);
}
};
if (i0 >= n_dims) {
store_coaelsced(x[ix + 0], x[ix + 1]);
return;
}
@@ -75,15 +104,26 @@ static __global__ void rope_norm(
const float x0 = x[ix + 0];
const float x1 = x[ix + 1];
dst[idst + 0] = x0*cos_theta - x1*sin_theta;
dst[idst + 1] = x0*sin_theta + x1*cos_theta;
store_coaelsced(x0 * cos_theta - x1 * sin_theta, x0 * sin_theta + x1 * cos_theta);
}
template<bool forward, bool has_ff, typename T>
static __global__ void rope_neox(
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
const int32_t * pos, const float freq_scale, const float ext_factor, const float attn_factor,
const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors) {
template <bool forward, bool has_ff, typename T, typename D>
static __global__ void rope_neox(const T * x,
D * dst,
const int ne0,
const int ne1,
const int s1,
const int s2,
const int n_dims,
const int32_t * pos,
const float freq_scale,
const float ext_factor,
const float attn_factor,
const rope_corr_dims corr_dims,
const float theta_scale,
const float * freq_factors,
const int64_t * row_indices,
const int set_rows_stride) {
const int i0 = 2*(blockDim.y*blockIdx.y + threadIdx.y);
if (i0 >= ne0) {
@@ -95,12 +135,19 @@ static __global__ void rope_neox(
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = row_dst*ne0 + i0/2;
int idst = row_dst * ne0 + i0 / 2;
const int ix = channel_x*s2 + row_x*s1 + i0/2;
// Fusion optimization: ROPE + VIEW + SET_ROWS.
// The rope output is viewed as a 1D tensor and offset based on a row index in row_indices.
if (set_rows_stride != 0) {
idst = row_x * ne0 + i0 / 2;
idst += row_indices[channel_x] * set_rows_stride;
}
if (i0 >= n_dims) {
dst[idst + i0/2 + 0] = x[ix + i0/2 + 0];
dst[idst + i0/2 + 1] = x[ix + i0/2 + 1];
dst[idst + i0 / 2 + 0] = ggml_cuda_cast<D>(x[ix + i0 / 2 + 0]);
dst[idst + i0 / 2 + 1] = ggml_cuda_cast<D>(x[ix + i0 / 2 + 1]);
return;
}
@@ -117,8 +164,8 @@ static __global__ void rope_neox(
const float x0 = x[ix + 0];
const float x1 = x[ix + n_dims/2];
dst[idst + 0] = x0*cos_theta - x1*sin_theta;
dst[idst + n_dims/2] = x0*sin_theta + x1*cos_theta;
dst[idst + 0] = ggml_cuda_cast<D>(x0 * cos_theta - x1 * sin_theta);
dst[idst + n_dims / 2] = ggml_cuda_cast<D>(x0 * sin_theta + x1 * cos_theta);
}
template<bool forward, bool has_ff, typename T>
@@ -238,11 +285,25 @@ static __global__ void rope_vision(
dst[idst + n_dims] = x0*sin_theta + x1*cos_theta;
}
template<bool forward, typename T>
static void rope_norm_cuda(
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims, const int nr,
const int32_t * pos, const float freq_scale, const float freq_base, const float ext_factor, const float attn_factor,
const rope_corr_dims corr_dims, const float * freq_factors, cudaStream_t stream) {
template <bool forward, typename T, typename D>
static void rope_norm_cuda(const T * x,
D * dst,
const int ne0,
const int ne1,
const int s1,
const int s2,
const int n_dims,
const int nr,
const int32_t * pos,
const float freq_scale,
const float freq_base,
const float ext_factor,
const float attn_factor,
const rope_corr_dims corr_dims,
const float * freq_factors,
const int64_t * row_indices,
const int set_rows_stride,
cudaStream_t stream) {
GGML_ASSERT(ne0 % 2 == 0);
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
const int n_blocks_x = (ne0 + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
@@ -252,20 +313,34 @@ static void rope_norm_cuda(
if (freq_factors == nullptr) {
rope_norm<forward, false><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
attn_factor, corr_dims, theta_scale, freq_factors);
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
freq_factors, row_indices, set_rows_stride);
} else {
rope_norm<forward, true><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
attn_factor, corr_dims, theta_scale, freq_factors);
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
freq_factors, row_indices, set_rows_stride);
}
}
template<bool forward, typename T>
static void rope_neox_cuda(
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims, const int nr,
const int32_t * pos, const float freq_scale, const float freq_base, const float ext_factor, const float attn_factor,
const rope_corr_dims corr_dims, const float * freq_factors, cudaStream_t stream) {
template <bool forward, typename T, typename D>
static void rope_neox_cuda(const T * x,
D * dst,
const int ne0,
const int ne1,
const int s1,
const int s2,
const int n_dims,
const int nr,
const int32_t * pos,
const float freq_scale,
const float freq_base,
const float ext_factor,
const float attn_factor,
const rope_corr_dims corr_dims,
const float * freq_factors,
const int64_t * row_indices,
const int set_rows_stride,
cudaStream_t stream) {
GGML_ASSERT(ne0 % 2 == 0);
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
const int n_blocks_x = (ne0 + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
@@ -274,13 +349,13 @@ static void rope_neox_cuda(
const float theta_scale = powf(freq_base, -2.0f/n_dims);
if (freq_factors == nullptr) {
rope_neox<forward, false, T><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
attn_factor, corr_dims, theta_scale, freq_factors);
rope_neox<forward, false><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
freq_factors, row_indices, set_rows_stride);
} else {
rope_neox<forward, true, T><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
attn_factor, corr_dims, theta_scale, freq_factors);
rope_neox<forward, true><<<block_nums, block_dims, 0, stream>>>(
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
freq_factors, row_indices, set_rows_stride);
}
}
@@ -333,7 +408,9 @@ static void rope_vision_cuda(
}
template <bool forward>
void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx,
ggml_tensor * dst,
const ggml_tensor * set_rows = nullptr) {
const ggml_tensor * src0 = dst->src[0];
const ggml_tensor * src1 = dst->src[1];
const ggml_tensor * src2 = dst->src[2];
@@ -341,12 +418,25 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
const float * src0_d = (const float *)src0->data;
const float * src1_d = (const float *)src1->data;
float * dst_d = (float *)dst->data;
void * dst_d = dst->data;
const int64_t * row_indices = nullptr;
ggml_type dst_type = dst->type;
int set_rows_stride = 0;
if (set_rows != nullptr) {
GGML_ASSERT(forward);
dst_d = set_rows->data;
row_indices = (const int64_t *) set_rows->src[1]->data;
dst_type = set_rows->type;
set_rows_stride = set_rows->nb[1] / ggml_type_size(set_rows->type);
}
cudaStream_t stream = ctx.stream();
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
GGML_ASSERT(src0->type == dst->type);
// When not fused, src0 and dst types must match
// When fused (ROPE+VIEW+SET_ROWS), src0 may be F32 and dst may be F16
GGML_ASSERT(src0->type == dst->type || (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16));
const int64_t ne00 = src0->ne[0]; // head dims
const int64_t ne01 = src0->ne[1]; // num heads
@@ -404,14 +494,18 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
// compute
if (is_neox) {
if (src0->type == GGML_TYPE_F32) {
rope_neox_cuda<forward>(
(const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
} else if (src0->type == GGML_TYPE_F16) {
rope_neox_cuda<forward>(
(const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F32) {
rope_neox_cuda<forward, float, float>((const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims,
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F16) {
rope_neox_cuda<forward, float, half>((const float *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims,
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else if (src0->type == GGML_TYPE_F16 && dst_type == GGML_TYPE_F16) {
rope_neox_cuda<forward, half, half>((const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr,
pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else {
GGML_ABORT("fatal error");
}
@@ -440,14 +534,18 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
GGML_ABORT("fatal error");
}
} else {
if (src0->type == GGML_TYPE_F32) {
rope_norm_cuda<forward>(
(const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
} else if (src0->type == GGML_TYPE_F16) {
rope_norm_cuda<forward>(
(const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F32) {
rope_norm_cuda<forward, float, float>((const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims,
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F16) {
rope_norm_cuda<forward, float, half>((const float *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims,
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else if (src0->type == GGML_TYPE_F16 && dst_type == GGML_TYPE_F16) {
rope_norm_cuda<forward, half, half>((const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr,
pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
freq_factors, row_indices, set_rows_stride, stream);
} else {
GGML_ABORT("fatal error");
}
@@ -461,3 +559,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
void ggml_cuda_op_rope_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
ggml_cuda_op_rope_impl<false>(ctx, dst);
}
void ggml_cuda_op_rope_fused(ggml_backend_cuda_context & ctx, ggml_tensor * rope, ggml_tensor * set_rows) {
ggml_cuda_op_rope_impl<true>(ctx, rope, set_rows);
}

View File

@@ -5,3 +5,5 @@
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_rope_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_rope_fused(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * set_rows);

View File

@@ -12,7 +12,9 @@ vendor = {
"https://raw.githubusercontent.com/nothings/stb/refs/heads/master/stb_image.h": "vendor/stb/stb_image.h",
"https://github.com/mackron/miniaudio/raw/refs/tags/0.11.22/miniaudio.h": "vendor/miniaudio/miniaudio.h",
# not using latest tag to avoid this issue: https://github.com/ggml-org/llama.cpp/pull/17179#discussion_r2515877926
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.27.0/httplib.h": "vendor/cpp-httplib/httplib.h",
}

View File

@@ -1592,9 +1592,10 @@ ggml_tensor * llm_graph_context::build_attn(
int il) const {
// these nodes are added to the graph together so that they are not reordered
// by doing so, the number of splits in the graph is reduced
// expand k later to enable rope fusion which directly writes into k-v cache
ggml_build_forward_expand(gf, q_cur);
ggml_build_forward_expand(gf, k_cur);
ggml_build_forward_expand(gf, v_cur);
ggml_build_forward_expand(gf, k_cur);
const auto * mctx_cur = inp->mctx;

View File

@@ -1013,7 +1013,7 @@ private:
}
private:
uint32_t get_node(size_t index) {
if (index > xcda_array_size) {
if (index >= xcda_array_size) {
throw std::runtime_error("Index out of array bounds in XCDA array!");
}
return xcda_array[index];

View File

@@ -7631,6 +7631,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
test_cases.emplace_back(new test_sum(GGML_TYPE_F32, it));
}
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
return test_cases;
}

View File

@@ -684,7 +684,7 @@ struct server_task_result {
}
virtual bool is_stop() {
// only used by server_task_result_cmpl_*
return false;
return true;
}
virtual int get_index() {
return -1;
@@ -3238,105 +3238,6 @@ struct server_context {
queue_results.send(std::move(res));
}
//
// Functions to create new task(s) and receive result(s)
//
void cancel_tasks(const std::unordered_set<int> & id_tasks) {
std::vector<server_task> cancel_tasks;
cancel_tasks.reserve(id_tasks.size());
for (const auto & id_task : id_tasks) {
SRV_WRN("cancel task, id_task = %d\n", id_task);
server_task task(SERVER_TASK_TYPE_CANCEL);
task.id_target = id_task;
queue_results.remove_waiting_task_id(id_task);
cancel_tasks.push_back(std::move(task));
}
// push to beginning of the queue, so it has highest priority
queue_tasks.post(std::move(cancel_tasks), true);
}
// receive the results from task(s)
void receive_multi_results(
const std::unordered_set<int> & id_tasks,
const std::function<void(std::vector<server_task_result_ptr>&)> & result_handler,
const std::function<void(json)> & error_handler,
const std::function<bool()> & is_connection_closed) {
std::vector<server_task_result_ptr> results(id_tasks.size());
for (int i = 0; i < (int)id_tasks.size(); i++) {
server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
if (is_connection_closed()) {
cancel_tasks(id_tasks);
return;
}
if (result == nullptr) {
i--; // retry
continue;
}
if (result->is_error()) {
error_handler(result->to_json());
cancel_tasks(id_tasks);
return;
}
GGML_ASSERT(
dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
|| dynamic_cast<server_task_result_embd*>(result.get()) != nullptr
|| dynamic_cast<server_task_result_rerank*>(result.get()) != nullptr
);
const size_t idx = result->get_index();
GGML_ASSERT(idx < results.size() && "index out of range");
results[idx] = std::move(result);
}
result_handler(results);
}
// receive the results from task(s), in stream mode
void receive_cmpl_results_stream(
const std::unordered_set<int> & id_tasks,
const std::function<bool(server_task_result_ptr&)> & result_handler,
const std::function<void(json)> & error_handler,
const std::function<bool()> & is_connection_closed) {
size_t n_finished = 0;
while (true) {
server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
if (is_connection_closed()) {
cancel_tasks(id_tasks);
return;
}
if (result == nullptr) {
continue; // retry
}
if (result->is_error()) {
error_handler(result->to_json());
cancel_tasks(id_tasks);
return;
}
GGML_ASSERT(
dynamic_cast<server_task_result_cmpl_partial*>(result.get()) != nullptr
|| dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
);
if (!result_handler(result)) {
cancel_tasks(id_tasks);
break;
}
if (result->is_stop()) {
if (++n_finished == id_tasks.size()) {
break;
}
}
}
}
//
// Functions to process the task
//
@@ -4418,6 +4319,104 @@ struct server_context {
}
};
// generator-like API for server responses, support pooling connection state and aggregating results
struct server_response_reader {
std::unordered_set<int> id_tasks;
server_context & ctx_server;
size_t received_count = 0;
bool cancelled = false;
server_response_reader(server_context & ctx_server) : ctx_server(ctx_server) {}
~server_response_reader() {
stop();
}
void post_tasks(std::vector<server_task> && tasks) {
id_tasks = server_task::get_list_id(tasks);
ctx_server.queue_results.add_waiting_tasks(tasks);
ctx_server.queue_tasks.post(std::move(tasks));
}
bool has_next() {
return !cancelled && received_count < id_tasks.size();
}
// return nullptr if should_stop() is true before receiving a result
// note: if one error is received, it will stop further processing and return error result
server_task_result_ptr next(const std::function<bool()> & should_stop) {
while (true) {
server_task_result_ptr result = ctx_server.queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
if (result == nullptr) {
// timeout, check stop condition
if (should_stop()) {
SRV_DBG("%s", "stopping wait for next result due to should_stop condition\n");
return nullptr;
}
} else {
if (result->is_error()) {
stop(); // cancel remaining tasks
SRV_DBG("%s", "received error result, stopping further processing\n");
return result;
}
if (result->is_stop()) {
received_count++;
}
return result;
}
}
// should not reach here
}
struct batch_response {
bool is_terminated = false; // if true, indicates that processing was stopped before all results were received
std::vector<server_task_result_ptr> results;
server_task_result_ptr error; // nullptr if no error
};
batch_response wait_for_all(const std::function<bool()> & should_stop) {
batch_response batch_res;
batch_res.results.resize(id_tasks.size());
while (has_next()) {
auto res = next(should_stop);
if (res == nullptr) {
batch_res.is_terminated = true;
return batch_res;
}
if (res->is_error()) {
batch_res.error = std::move(res);
return batch_res;
}
const size_t idx = res->get_index();
GGML_ASSERT(idx < batch_res.results.size() && "index out of range");
GGML_ASSERT(batch_res.results[idx] == nullptr && "duplicate result received");
batch_res.results[idx] = std::move(res);
}
return batch_res;
}
void stop() {
ctx_server.queue_results.remove_waiting_task_ids(id_tasks);
if (has_next() && !cancelled) {
// if tasks is not finished yet, cancel them
cancelled = true;
std::vector<server_task> cancel_tasks;
cancel_tasks.reserve(id_tasks.size());
for (const auto & id_task : id_tasks) {
SRV_WRN("cancel task, id_task = %d\n", id_task);
server_task task(SERVER_TASK_TYPE_CANCEL);
task.id_target = id_task;
ctx_server.queue_results.remove_waiting_task_id(id_task);
cancel_tasks.push_back(std::move(task));
}
// push to beginning of the queue, so it has highest priority
ctx_server.queue_tasks.post(std::move(cancel_tasks), true);
} else {
SRV_DBG("%s", "all tasks already finished, no need to cancel\n");
}
}
};
static void log_server_request(const httplib::Request & req, const httplib::Response & res) {
// skip GH copilot requests when using default port
if (req.path == "/v1/health") {
@@ -5000,7 +4999,10 @@ int main(int argc, char ** argv) {
GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
auto completion_id = gen_chatcmplid();
std::unordered_set<int> task_ids;
// need to store the reader as a pointer, so that it won't be destroyed when the handle returns
// use shared_ptr as it's shared between the chunked_content_provider() and on_complete()
const auto rd = std::make_shared<server_response_reader>(ctx_server);
try {
std::vector<server_task> tasks;
@@ -5018,17 +5020,8 @@ int main(int argc, char ** argv) {
// Everything else, including multimodal completions.
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
}
const size_t n_ctx_slot = ctx_server.slots.front().n_ctx;
tasks.reserve(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
auto n_prompt_tokens = inputs[i].size();
if (n_prompt_tokens >= n_ctx_slot) {
json error_data = format_error_response("the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
error_data["n_prompt_tokens"] = n_prompt_tokens;
error_data["n_ctx"] = n_ctx_slot;
res_error(res, error_data);
return;
}
server_task task = server_task(type);
task.id = ctx_server.queue_tasks.get_new_id();
@@ -5049,9 +5042,7 @@ int main(int argc, char ** argv) {
tasks.push_back(std::move(task));
}
task_ids = server_task::get_list_id(tasks);
ctx_server.queue_results.add_waiting_tasks(tasks);
ctx_server.queue_tasks.post(std::move(tasks));
rd->post_tasks(std::move(tasks));
} catch (const std::exception & e) {
res_error(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
return;
@@ -5060,54 +5051,95 @@ int main(int argc, char ** argv) {
bool stream = json_value(data, "stream", false);
if (!stream) {
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
if (results.size() == 1) {
// single result
res_ok(res, results[0]->to_json());
} else {
// multiple results (multitask)
json arr = json::array();
for (auto & res : results) {
arr.push_back(res->to_json());
}
res_ok(res, arr);
// non-stream, wait for the results
auto all_results = rd->wait_for_all(is_connection_closed);
if (all_results.is_terminated) {
return; // connection is closed
} else if (all_results.error) {
res_error(res, all_results.error->to_json());
return;
} else {
json arr = json::array();
for (auto & res : all_results.results) {
GGML_ASSERT(dynamic_cast<server_task_result_cmpl_final*>(res.get()) != nullptr);
arr.push_back(res->to_json());
}
}, [&](const json & error_data) {
res_error(res, error_data);
}, is_connection_closed);
// if single request, return single object instead of array
res_ok(res, arr.size() == 1 ? arr[0] : arr);
}
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
} else {
const auto chunked_content_provider = [task_ids, &ctx_server, oaicompat](size_t, httplib::DataSink & sink) {
ctx_server.receive_cmpl_results_stream(task_ids, [&](server_task_result_ptr & result) -> bool {
json res_json = result->to_json();
if (res_json.is_array()) {
for (const auto & res : res_json) {
if (!server_sent_event(sink, res)) {
// sending failed (HTTP connection closed), cancel the generation
return false;
}
}
return true;
} else {
return server_sent_event(sink, res_json);
// in streaming mode, the first error must be treated as non-stream response
// this is to match the OAI API behavior
// ref: https://github.com/ggml-org/llama.cpp/pull/16486#discussion_r2419657309
server_task_result_ptr first_result = rd->next(is_connection_closed);
if (first_result == nullptr) {
return; // connection is closed
} else if (first_result->is_error()) {
res_error(res, first_result->to_json());
return;
} else {
GGML_ASSERT(
dynamic_cast<server_task_result_cmpl_partial*>(first_result.get()) != nullptr
|| dynamic_cast<server_task_result_cmpl_final*>(first_result.get()) != nullptr
);
}
// next responses are streamed
json first_result_json = first_result->to_json();
const auto chunked_content_provider = [first_result_json, rd, oaicompat](size_t, httplib::DataSink & sink) mutable -> bool {
// flush the first result as it's not an error
if (!first_result_json.empty()) {
if (!server_sent_event(sink, first_result_json)) {
sink.done();
return false; // sending failed, go to on_complete()
}
}, [&](const json & error_data) {
server_sent_event(sink, json{{"error", error_data}});
}, [&sink]() {
// note: do not use req.is_connection_closed here because req is already destroyed
return !sink.is_writable();
});
if (oaicompat != OAICOMPAT_TYPE_NONE) {
static const std::string ev_done = "data: [DONE]\n\n";
sink.write(ev_done.data(), ev_done.size());
first_result_json.clear(); // mark as sent
}
sink.done();
return false;
// receive subsequent results
auto result = rd->next([&sink]{ return !sink.is_writable(); });
if (result == nullptr) {
sink.done();
return false; // connection is closed, go to on_complete()
}
// send the results
json res_json = result->to_json();
bool ok = false;
if (result->is_error()) {
ok = server_sent_event(sink, json {{ "error", result->to_json() }});
sink.done();
return false; // go to on_complete()
} else {
GGML_ASSERT(
dynamic_cast<server_task_result_cmpl_partial*>(result.get()) != nullptr
|| dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
);
ok = server_sent_event(sink, res_json);
}
if (!ok) {
sink.done();
return false; // sending failed, go to on_complete()
}
// check if there is more data
if (!rd->has_next()) {
if (oaicompat != OAICOMPAT_TYPE_NONE) {
static const std::string ev_done = "data: [DONE]\n\n";
sink.write(ev_done.data(), ev_done.size());
}
sink.done();
return false; // no more data, go to on_complete()
}
// has next data, continue
return true;
};
auto on_complete = [task_ids, &ctx_server] (bool) {
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
auto on_complete = [rd](bool) {
rd->stop();
};
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
@@ -5401,8 +5433,7 @@ int main(int argc, char ** argv) {
// create and queue the task
json responses = json::array();
bool error = false;
std::unordered_set<int> task_ids;
server_response_reader rd(ctx_server);
{
std::vector<server_task> tasks;
for (size_t i = 0; i < tokenized_prompts.size(); i++) {
@@ -5418,27 +5449,23 @@ int main(int argc, char ** argv) {
tasks.push_back(std::move(task));
}
task_ids = server_task::get_list_id(tasks);
ctx_server.queue_results.add_waiting_tasks(tasks);
ctx_server.queue_tasks.post(std::move(tasks));
rd.post_tasks(std::move(tasks));
}
// get the result
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
for (auto & res : results) {
// wait for the results
auto all_results = rd.wait_for_all(req.is_connection_closed);
// collect results
if (all_results.is_terminated) {
return; // connection is closed
} else if (all_results.error) {
res_error(res, all_results.error->to_json());
return;
} else {
for (auto & res : all_results.results) {
GGML_ASSERT(dynamic_cast<server_task_result_embd*>(res.get()) != nullptr);
responses.push_back(res->to_json());
}
}, [&](const json & error_data) {
res_error(res, error_data);
error = true;
}, req.is_connection_closed);
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
if (error) {
return;
}
// write JSON response
@@ -5492,8 +5519,7 @@ int main(int argc, char ** argv) {
// create and queue the task
json responses = json::array();
bool error = false;
std::unordered_set<int> task_ids;
server_response_reader rd(ctx_server);
{
std::vector<server_task> tasks;
tasks.reserve(documents.size());
@@ -5505,24 +5531,23 @@ int main(int argc, char ** argv) {
task.tokens = std::move(tmp);
tasks.push_back(std::move(task));
}
task_ids = server_task::get_list_id(tasks);
ctx_server.queue_results.add_waiting_tasks(tasks);
ctx_server.queue_tasks.post(std::move(tasks));
rd.post_tasks(std::move(tasks));
}
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
for (auto & res : results) {
// wait for the results
auto all_results = rd.wait_for_all(req.is_connection_closed);
// collect results
if (all_results.is_terminated) {
return; // connection is closed
} else if (all_results.error) {
res_error(res, all_results.error->to_json());
return;
} else {
for (auto & res : all_results.results) {
GGML_ASSERT(dynamic_cast<server_task_result_rerank*>(res.get()) != nullptr);
responses.push_back(res->to_json());
}
}, [&](const json & error_data) {
res_error(res, error_data);
error = true;
}, req.is_connection_closed);
if (error) {
return;
}
// write JSON response

View File

@@ -453,15 +453,29 @@ static std::string tokens_to_output_formatted_string(const llama_context * ctx,
return out;
}
// note: if data is a json array, it will be sent as multiple events, one per item
static bool server_sent_event(httplib::DataSink & sink, const json & data) {
const std::string str =
"data: " +
data.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
static auto send_single = [](httplib::DataSink & sink, const json & data) -> bool {
const std::string str =
"data: " +
data.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
LOG_DBG("data stream, to_send: %s", str.c_str());
LOG_DBG("data stream, to_send: %s", str.c_str());
return sink.write(str.c_str(), str.size());
};
return sink.write(str.c_str(), str.size());
if (data.is_array()) {
for (const auto & item : data) {
if (!send_single(sink, item)) {
return false;
}
}
} else {
return send_single(sink, data);
}
return true;
}
//

View File

@@ -11,8 +11,16 @@ const preview: Preview = {
date: /Date$/i
}
},
backgrounds: {
disable: true
},
a11y: {
// 'todo' - show a11y violations in the test UI only
// 'error' - fail CI on a11y violations
// 'off' - skip a11y checks entirely
test: 'todo'
}
},
decorators: [

View File

@@ -1,8 +1,9 @@
import * as a11yAddonAnnotations from '@storybook/addon-a11y/preview';
import { setProjectAnnotations } from '@storybook/sveltekit';
import * as previewAnnotations from './preview';
import { beforeAll } from 'vitest';
const project = setProjectAnnotations([previewAnnotations]);
const project = setProjectAnnotations([a11yAddonAnnotations, previewAnnotations]);
beforeAll(async () => {
if (project.beforeAll) {

View File

@@ -22,20 +22,20 @@
"unist-util-visit": "^5.0.0"
},
"devDependencies": {
"@chromatic-com/storybook": "^4.0.1",
"@chromatic-com/storybook": "^4.1.2",
"@eslint/compat": "^1.2.5",
"@eslint/js": "^9.18.0",
"@internationalized/date": "^3.8.2",
"@lucide/svelte": "^0.515.0",
"@playwright/test": "^1.49.1",
"@storybook/addon-a11y": "^9.0.17",
"@storybook/addon-docs": "^9.0.17",
"@storybook/addon-svelte-csf": "^5.0.7",
"@storybook/addon-vitest": "^9.0.17",
"@storybook/sveltekit": "^9.0.17",
"@sveltejs/adapter-static": "^3.0.8",
"@sveltejs/kit": "^2.22.0",
"@sveltejs/vite-plugin-svelte": "^6.0.0",
"@storybook/addon-a11y": "^10.0.7",
"@storybook/addon-docs": "^10.0.7",
"@storybook/addon-svelte-csf": "^5.0.10",
"@storybook/addon-vitest": "^10.0.7",
"@storybook/sveltekit": "^10.0.7",
"@sveltejs/adapter-static": "^3.0.10",
"@sveltejs/kit": "^2.48.4",
"@sveltejs/vite-plugin-svelte": "^6.2.1",
"@tailwindcss/forms": "^0.5.9",
"@tailwindcss/typography": "^0.5.15",
"@tailwindcss/vite": "^4.0.0",
@@ -46,21 +46,21 @@
"dexie": "^4.0.11",
"eslint": "^9.18.0",
"eslint-config-prettier": "^10.0.1",
"eslint-plugin-storybook": "^9.0.17",
"eslint-plugin-storybook": "^10.0.7",
"eslint-plugin-svelte": "^3.0.0",
"fflate": "^0.8.2",
"globals": "^16.0.0",
"http-server": "^14.1.1",
"mdast": "^3.0.0",
"mdsvex": "^0.12.3",
"playwright": "^1.53.0",
"playwright": "^1.56.1",
"prettier": "^3.4.2",
"prettier-plugin-svelte": "^3.3.3",
"prettier-plugin-tailwindcss": "^0.6.11",
"rehype-katex": "^7.0.1",
"remark-math": "^6.0.0",
"sass": "^1.93.3",
"storybook": "^9.0.17",
"storybook": "^10.0.7",
"svelte": "^5.0.0",
"svelte-check": "^4.0.0",
"tailwind-merge": "^3.3.1",
@@ -71,7 +71,7 @@
"typescript-eslint": "^8.20.0",
"unified": "^11.0.5",
"uuid": "^13.0.0",
"vite": "^7.0.4",
"vite": "^7.2.2",
"vite-plugin-devtools-json": "^0.2.0",
"vitest": "^3.2.3",
"vitest-browser-svelte": "^0.1.0"
@@ -133,9 +133,9 @@
}
},
"node_modules/@chromatic-com/storybook": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@chromatic-com/storybook/-/storybook-4.0.1.tgz",
"integrity": "sha512-GQXe5lyZl3yLewLJQyFXEpOp2h+mfN2bPrzYaOFNCJjO4Js9deKbRHTOSaiP2FRwZqDLdQwy2+SEGeXPZ94yYw==",
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/@chromatic-com/storybook/-/storybook-4.1.2.tgz",
"integrity": "sha512-QAWGtHwib0qsP5CcO64aJCF75zpFgpKK3jNpxILzQiPK3sVo4EmnVGJVdwcZWpWrGdH8E4YkncGoitw4EXzKMg==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -150,7 +150,7 @@
"yarn": ">=1.22.18"
},
"peerDependencies": {
"storybook": "^0.0.0-0 || ^9.0.0 || ^9.1.0-0"
"storybook": "^0.0.0-0 || ^9.0.0 || ^9.1.0-0 || ^9.2.0-0 || ^10.0.0-0 || ^10.1.0-0 || ^10.2.0-0 || ^10.3.0-0"
}
},
"node_modules/@esbuild/aix-ppc64": {
@@ -894,6 +894,17 @@
"@jridgewell/trace-mapping": "^0.3.24"
}
},
"node_modules/@jridgewell/remapping": {
"version": "2.3.5",
"resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz",
"integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@jridgewell/gen-mapping": "^0.3.5",
"@jridgewell/trace-mapping": "^0.3.24"
}
},
"node_modules/@jridgewell/resolve-uri": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
@@ -1502,13 +1513,13 @@
}
},
"node_modules/@playwright/test": {
"version": "1.54.1",
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.54.1.tgz",
"integrity": "sha512-FS8hQ12acieG2dYSksmLOF7BNxnVf2afRJdCuM1eMSxj6QTSE6G4InGF7oApGgDb65MX7AwMVlIkpru0yZA4Xw==",
"version": "1.56.1",
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.56.1.tgz",
"integrity": "sha512-vSMYtL/zOcFpvJCW71Q/OEGQb7KYBPAdKh35WNSkaZA75JlAO8ED8UN6GUNTm3drWomcbcqRPFqQbLae8yBTdg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright": "1.54.1"
"playwright": "1.56.1"
},
"bin": {
"playwright": "cli.js"
@@ -1812,9 +1823,9 @@
"license": "MIT"
},
"node_modules/@storybook/addon-a11y": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/addon-a11y/-/addon-a11y-9.0.17.tgz",
"integrity": "sha512-9cXNK3q/atx3hwJAt9HkJbd9vUxCXfKKiNNuSACbf8h9/j6u3jktulKOf6Xjc3B8lwn6ZpdK/x1HHZN2kTqsvg==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/addon-a11y/-/addon-a11y-10.0.7.tgz",
"integrity": "sha512-JsYPpZ/n67/2bI1XJeyrAWHHQkHemPkPHjCA0tAUnMz1Shlo/LV2q1Ahgpxoihx4strbHwZz71bcS4MqkHBduA==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -1826,20 +1837,20 @@
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17"
"storybook": "^10.0.7"
}
},
"node_modules/@storybook/addon-docs": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-9.0.17.tgz",
"integrity": "sha512-LOX/kKgQGnyulrqZHsvf77+ZoH/nSUaplGr5hvZglW/U6ak6fO9seJyXAzVKEnC6p+F8n02kFBZbi3s+znQhSg==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-10.0.7.tgz",
"integrity": "sha512-qQQMoeYZC4W+/8ubfOZiTrE8nYC/f4wWP1uq4peRyDy1N2nIN9SwhyxwMn0m3VpeGmRBga5dLvJY9ko6SnJekg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@mdx-js/react": "^3.0.0",
"@storybook/csf-plugin": "9.0.17",
"@storybook/icons": "^1.2.12",
"@storybook/react-dom-shim": "9.0.17",
"@storybook/csf-plugin": "10.0.7",
"@storybook/icons": "^1.6.0",
"@storybook/react-dom-shim": "10.0.7",
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"ts-dedent": "^2.0.0"
@@ -1849,13 +1860,13 @@
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17"
"storybook": "^10.0.7"
}
},
"node_modules/@storybook/addon-svelte-csf": {
"version": "5.0.7",
"resolved": "https://registry.npmjs.org/@storybook/addon-svelte-csf/-/addon-svelte-csf-5.0.7.tgz",
"integrity": "sha512-6Zmy5HjOlrrG6OoKRTGDr9LR6zRK4/Sa7raFzQRKHGASgMlfKsMdNTNO0sxnMUWCu2JMS6HsuoLtB3Ma8SlYtg==",
"version": "5.0.10",
"resolved": "https://registry.npmjs.org/@storybook/addon-svelte-csf/-/addon-svelte-csf-5.0.10.tgz",
"integrity": "sha512-poSvTS7VdaQ42ZoqW5e4+2Hv1iLO0mekH9fwn/QuBNse48R4WlTyR8XFbHRTfatl9gdc9ZYC4uWzazrmV6zGIA==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -1868,22 +1879,22 @@
"zimmerframe": "^1.1.2"
},
"peerDependencies": {
"@storybook/svelte": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0",
"@storybook/svelte": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0 || ^10.0.0-0",
"@sveltejs/vite-plugin-svelte": "^4.0.0 || ^5.0.0 || ^6.0.0",
"storybook": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0",
"storybook": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0 || ^10.0.0-0",
"svelte": "^5.0.0",
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
}
},
"node_modules/@storybook/addon-vitest": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/addon-vitest/-/addon-vitest-9.0.17.tgz",
"integrity": "sha512-eogqcGbACR1sTedBSE2SP/4QV1ruicHYEhYjBtoPIjvYgymN1g5KSuQNysLx4f0SvAzczrcNjX2WVVLX2DVyzA==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/addon-vitest/-/addon-vitest-10.0.7.tgz",
"integrity": "sha512-i6v/mAl+elrUxb+1f4NdnM17t/fg+KGJWL1U9quflXTd3KiLY0xJB4LwNP6yYo7Imc5NIO2fRkJbGvNqLBRe2Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/global": "^5.0.0",
"@storybook/icons": "^1.4.0",
"@storybook/icons": "^1.6.0",
"prompts": "^2.4.0",
"ts-dedent": "^2.2.0"
},
@@ -1892,15 +1903,19 @@
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"@vitest/browser": "^3.0.0",
"@vitest/runner": "^3.0.0",
"storybook": "^9.0.17",
"vitest": "^3.0.0"
"@vitest/browser": "^3.0.0 || ^4.0.0",
"@vitest/browser-playwright": "^4.0.0",
"@vitest/runner": "^3.0.0 || ^4.0.0",
"storybook": "^10.0.7",
"vitest": "^3.0.0 || ^4.0.0"
},
"peerDependenciesMeta": {
"@vitest/browser": {
"optional": true
},
"@vitest/browser-playwright": {
"optional": true
},
"@vitest/runner": {
"optional": true
},
@@ -1910,13 +1925,13 @@
}
},
"node_modules/@storybook/builder-vite": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-9.0.17.tgz",
"integrity": "sha512-lyuvgGhb0NaVk1tdB4xwzky6+YXQfxlxfNQqENYZ9uYQZdPfErMa4ZTXVQTV+CQHAa2NL+p/dG2JPAeu39e9UA==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-10.0.7.tgz",
"integrity": "sha512-wk2TAoUY5+9t78GWVBndu9rEo9lo6Ec3SRrLT4VpIlcS2GPK+5f26UC2uvIBwOF/N7JrUUKq/zWDZ3m+do9QDg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/csf-plugin": "9.0.17",
"@storybook/csf-plugin": "10.0.7",
"ts-dedent": "^2.0.0"
},
"funding": {
@@ -1924,7 +1939,7 @@
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17",
"storybook": "^10.0.7",
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
}
},
@@ -1939,20 +1954,38 @@
}
},
"node_modules/@storybook/csf-plugin": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-9.0.17.tgz",
"integrity": "sha512-6Q4eo1ObrLlsnB6bIt6T8+45XAb4to2pQGNrI7QPkLQRLrZinrJcNbLY7AGkyIoCOEsEbq08n09/nClQUbu8HA==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-10.0.7.tgz",
"integrity": "sha512-YaYYlCyJBwxaMk7yREOdz+9MDSgxIYGdeJ9EIq/bUndmkoj9SRo1P9/0lC5dseWQoiGy4T3PbZiWruD8uM5m3g==",
"dev": true,
"license": "MIT",
"dependencies": {
"unplugin": "^1.3.1"
"unplugin": "^2.3.5"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17"
"esbuild": "*",
"rollup": "*",
"storybook": "^10.0.7",
"vite": "*",
"webpack": "*"
},
"peerDependenciesMeta": {
"esbuild": {
"optional": true
},
"rollup": {
"optional": true
},
"vite": {
"optional": true
},
"webpack": {
"optional": true
}
}
},
"node_modules/@storybook/global": {
@@ -1963,9 +1996,9 @@
"license": "MIT"
},
"node_modules/@storybook/icons": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.4.0.tgz",
"integrity": "sha512-Td73IeJxOyalzvjQL+JXx72jlIYHgs+REaHiREOqfpo3A2AYYG71AUbcv+lg7mEDIweKVCxsMQ0UKo634c8XeA==",
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.6.0.tgz",
"integrity": "sha512-hcFZIjW8yQz8O8//2WTIXylm5Xsgc+lW9ISLgUk1xGmptIJQRdlhVIXCpSyLrQaaRiyhQRaVg7l3BD9S216BHw==",
"dev": true,
"license": "MIT",
"engines": {
@@ -1977,9 +2010,9 @@
}
},
"node_modules/@storybook/react-dom-shim": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-9.0.17.tgz",
"integrity": "sha512-ak/x/m6MDDxdE6rCDymTltaiQF3oiKrPHSwfM+YPgQR6MVmzTTs4+qaPfeev7FZEHq23IkfDMTmSTTJtX7Vs9A==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-10.0.7.tgz",
"integrity": "sha512-bp4OnMtZGwPJQDqNRi4K5iibLbZ2TZZMkWW7oSw5jjPFpGSreSjCe8LH9yj/lDnK8Ox9bGMCBFE5RV5XuML29w==",
"dev": true,
"license": "MIT",
"funding": {
@@ -1987,126 +2020,75 @@
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta",
"storybook": "^9.0.17"
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
"storybook": "^10.0.7"
}
},
"node_modules/@storybook/svelte": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/svelte/-/svelte-9.0.17.tgz",
"integrity": "sha512-RwOswdq7S3+ZOuoM/oRrcmlsKdjcd/3wMHbuirzYoAhdwsjubSuRepMV64O9RnlXd3x7rZw4fXpq1M/SVo5XiQ==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/svelte/-/svelte-10.0.7.tgz",
"integrity": "sha512-rO+YQhHucy47Vh67z318pALmd6x+K1Kj30Fb4a6oOEw4xn4zCo9KTmkMWs24c4oduEXD/eJu3badlRmsVXzyfA==",
"dev": true,
"license": "MIT",
"dependencies": {
"ts-dedent": "^2.0.0",
"type-fest": "~2.19"
},
"engines": {
"node": ">=20.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17",
"storybook": "^10.0.7",
"svelte": "^5.0.0"
}
},
"node_modules/@storybook/sveltekit": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/sveltekit/-/sveltekit-9.0.17.tgz",
"integrity": "sha512-CUOATuW5Qk3SjNvmjH+wyx2GCsMF1cvw3gwkujV9kehPebzV20NhgHpbzSoepvwF7+Bj6jl8V6UxiMWk0jJFmA==",
"node_modules/@storybook/svelte-vite": {
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/svelte-vite/-/svelte-vite-10.0.7.tgz",
"integrity": "sha512-q9/RtrhX1CnznO6AO9MDEy1bsccbGeRxW28FLpgUrztV4IGZ/dFUrFIFurKRyuA3/nFsbtzp1F5jFt3RExmmTw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/builder-vite": "9.0.17",
"@storybook/svelte": "9.0.17",
"@storybook/svelte-vite": "9.0.17"
},
"engines": {
"node": ">=20.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"storybook": "^9.0.17",
"svelte": "^5.0.0",
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
}
},
"node_modules/@storybook/sveltekit/node_modules/@storybook/svelte-vite": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/@storybook/svelte-vite/-/svelte-vite-9.0.17.tgz",
"integrity": "sha512-fRIxOZy9IRI6BfL1LgFn+B+IckGOlT1SstD01y9ddO4pVKWih/l+vb44bnZs+Z0faJZbrG/LgfnXTOPj052Z8g==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/builder-vite": "9.0.17",
"@storybook/svelte": "9.0.17",
"@storybook/builder-vite": "10.0.7",
"@storybook/svelte": "10.0.7",
"magic-string": "^0.30.0",
"svelte2tsx": "^0.7.35",
"svelte2tsx": "^0.7.44",
"typescript": "^4.9.4 || ^5.0.0"
},
"engines": {
"node": ">=20.0.0"
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"@sveltejs/vite-plugin-svelte": "^2.0.0 || ^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0",
"storybook": "^10.0.7",
"svelte": "^5.0.0",
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
}
},
"node_modules/@storybook/sveltekit": {
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/@storybook/sveltekit/-/sveltekit-10.0.7.tgz",
"integrity": "sha512-ujTW7PfWvgBrzd7jzaZe9JgjUeM5YvBKm+xru6t7Dr4bdfmkKqlZHPRdXn/sy+fQNyfg6JL2WKy2KIIeA+RvSg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/builder-vite": "10.0.7",
"@storybook/svelte": "10.0.7",
"@storybook/svelte-vite": "10.0.7"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/storybook"
},
"peerDependencies": {
"@sveltejs/vite-plugin-svelte": "^2.0.0 || ^3.0.0 || ^4.0.0 || ^5.0.0",
"storybook": "^9.0.17",
"storybook": "^10.0.7",
"svelte": "^5.0.0",
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
}
},
"node_modules/@storybook/sveltekit/node_modules/@sveltejs/vite-plugin-svelte": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-5.1.1.tgz",
"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
"debug": "^4.4.1",
"deepmerge": "^4.3.1",
"kleur": "^4.1.5",
"magic-string": "^0.30.17",
"vitefu": "^1.0.6"
},
"engines": {
"node": "^18.0.0 || ^20.0.0 || >=22"
},
"peerDependencies": {
"svelte": "^5.0.0",
"vite": "^6.0.0"
}
},
"node_modules/@storybook/sveltekit/node_modules/@sveltejs/vite-plugin-svelte/node_modules/@sveltejs/vite-plugin-svelte-inspector": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte-inspector/-/vite-plugin-svelte-inspector-4.0.1.tgz",
"integrity": "sha512-J/Nmb2Q2y7mck2hyCX4ckVHcR5tu2J+MtBEQqpDrrgELZ2uvraQcK/ioCV61AqkdXFgriksOKIceDcQmqnGhVw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"debug": "^4.3.7"
},
"engines": {
"node": "^18.0.0 || ^20.0.0 || >=22"
},
"peerDependencies": {
"@sveltejs/vite-plugin-svelte": "^5.0.0",
"svelte": "^5.0.0",
"vite": "^6.0.0"
}
},
"node_modules/@sveltejs/acorn-typescript": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/@sveltejs/acorn-typescript/-/acorn-typescript-1.0.5.tgz",
@@ -2117,9 +2099,9 @@
}
},
"node_modules/@sveltejs/adapter-static": {
"version": "3.0.9",
"resolved": "https://registry.npmjs.org/@sveltejs/adapter-static/-/adapter-static-3.0.9.tgz",
"integrity": "sha512-aytHXcMi7lb9ljsWUzXYQ0p5X1z9oWud2olu/EpmH7aCu4m84h7QLvb5Wp+CFirKcwoNnYvYWhyP/L8Vh1ztdw==",
"version": "3.0.10",
"resolved": "https://registry.npmjs.org/@sveltejs/adapter-static/-/adapter-static-3.0.10.tgz",
"integrity": "sha512-7D9lYFWJmB7zxZyTE/qxjksvMqzMuYrrsyh1f4AlZqeZeACPRySjbC3aFiY55wb1tWUaKOQG9PVbm74JcN2Iew==",
"dev": true,
"license": "MIT",
"peerDependencies": {
@@ -2127,9 +2109,9 @@
}
},
"node_modules/@sveltejs/kit": {
"version": "2.37.0",
"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.37.0.tgz",
"integrity": "sha512-xgKtpjQ6Ry4mdShd01ht5AODUsW7+K1iValPDq7QX8zI1hWOKREH9GjG8SRCN5tC4K7UXmMhuQam7gbLByVcnw==",
"version": "2.48.4",
"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.48.4.tgz",
"integrity": "sha512-TGFX1pZUt9qqY20Cv5NyYvy0iLWHf2jXi8s+eCGsig7jQMdwZWKUFMR6TbvFNhfDSUpc1sH/Y5EHv20g3HHA3g==",
"dev": true,
"license": "MIT",
"dependencies": {
@@ -2166,16 +2148,15 @@
}
},
"node_modules/@sveltejs/vite-plugin-svelte": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-6.1.0.tgz",
"integrity": "sha512-+U6lz1wvGEG/BvQyL4z/flyNdQ9xDNv5vrh+vWBWTHaebqT0c9RNggpZTo/XSPoHsSCWBlYaTlRX8pZ9GATXCw==",
"version": "6.2.1",
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-6.2.1.tgz",
"integrity": "sha512-YZs/OSKOQAQCnJvM/P+F1URotNnYNeU3P2s4oIpzm1uFaqUEqRxUB0g5ejMjEb5Gjb9/PiBI5Ktrq4rUUF8UVQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@sveltejs/vite-plugin-svelte-inspector": "^5.0.0-next.1",
"@sveltejs/vite-plugin-svelte-inspector": "^5.0.0",
"debug": "^4.4.1",
"deepmerge": "^4.3.1",
"kleur": "^4.1.5",
"magic-string": "^0.30.17",
"vitefu": "^1.1.1"
},
@@ -3361,19 +3342,6 @@
"node": ">= 0.8"
}
},
"node_modules/better-opn": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz",
"integrity": "sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"open": "^8.0.4"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/bits-ui": {
"version": "2.8.11",
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.8.11.tgz",
@@ -3844,16 +3812,6 @@
"node": ">=0.10.0"
}
},
"node_modules/define-lazy-prop": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
"integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/dequal": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
@@ -4042,19 +4000,6 @@
"@esbuild/win32-x64": "0.25.8"
}
},
"node_modules/esbuild-register": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz",
"integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==",
"dev": true,
"license": "MIT",
"dependencies": {
"debug": "^4.3.4"
},
"peerDependencies": {
"esbuild": ">=0.12 <1"
}
},
"node_modules/escape-string-regexp": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -4146,20 +4091,17 @@
}
},
"node_modules/eslint-plugin-storybook": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-9.0.17.tgz",
"integrity": "sha512-IuTdlwCEwoDNobdygRCxNhlKXHmsDfPtPvHGcsY35x2Bx8KItrjfekO19gJrjc1VT2CMfcZMYF8OBKaxHELupw==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-10.0.7.tgz",
"integrity": "sha512-qOQq9KdT1jsBgT3qsxUH2n67aj1WR8D1XCoER8Q6yuVlS5TimNwk1mZeWkXVf/o4RQQT6flT2y5cG2gPLZPvJA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/utils": "^8.8.1"
},
"engines": {
"node": ">=20.0.0"
},
"peerDependencies": {
"eslint": ">=8",
"storybook": "^9.0.17"
"storybook": "^10.0.7"
}
},
"node_modules/eslint-plugin-svelte": {
@@ -4405,11 +4347,14 @@
}
},
"node_modules/fdir": {
"version": "6.4.6",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=12.0.0"
},
"peerDependencies": {
"picomatch": "^3 || ^4"
},
@@ -5072,22 +5017,6 @@
"integrity": "sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==",
"license": "MIT"
},
"node_modules/is-docker": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz",
"integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==",
"dev": true,
"license": "MIT",
"bin": {
"is-docker": "cli.js"
},
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-extglob": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -5133,19 +5062,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/is-wsl": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz",
"integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==",
"dev": true,
"license": "MIT",
"dependencies": {
"is-docker": "^2.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
@@ -5591,16 +5507,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/lower-case": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz",
"integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==",
"dev": true,
"license": "MIT",
"dependencies": {
"tslib": "^2.0.3"
}
},
"node_modules/lowlight": {
"version": "3.3.0",
"resolved": "https://registry.npmjs.org/lowlight/-/lowlight-3.3.0.tgz",
@@ -6783,17 +6689,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/no-case": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz",
"integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==",
"dev": true,
"license": "MIT",
"dependencies": {
"lower-case": "^2.0.2",
"tslib": "^2.0.3"
}
},
"node_modules/node-addon-api": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz",
@@ -6815,24 +6710,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/open": {
"version": "8.4.2",
"resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
"integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"define-lazy-prop": "^2.0.0",
"is-docker": "^2.1.1",
"is-wsl": "^2.2.0"
},
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/opener": {
"version": "1.5.2",
"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
@@ -6919,17 +6796,6 @@
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/pascal-case": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz",
"integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==",
"dev": true,
"license": "MIT",
"dependencies": {
"no-case": "^3.0.4",
"tslib": "^2.0.3"
}
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -7000,13 +6866,13 @@
}
},
"node_modules/playwright": {
"version": "1.54.1",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.54.1.tgz",
"integrity": "sha512-peWpSwIBmSLi6aW2auvrUtf2DqY16YYcCMO8rTVx486jKmDTJg7UAhyrraP98GB8BoPURZP8+nxO7TSd4cPr5g==",
"version": "1.56.1",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz",
"integrity": "sha512-aFi5B0WovBHTEvpM3DzXTUaeN6eN0qWnTkKx4NQaH4Wvcmc153PdaY2UBdSYKaGYw+UyWXSVyxDUg5DoPEttjw==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.54.1"
"playwright-core": "1.56.1"
},
"bin": {
"playwright": "cli.js"
@@ -7019,9 +6885,9 @@
}
},
"node_modules/playwright-core": {
"version": "1.54.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.54.1.tgz",
"integrity": "sha512-Nbjs2zjj0htNhzgiy5wu+3w09YetDx5pkrpI/kZotDlDUaYk0HVA5xrBVPdow4SAUIlhgKcJeJg4GRKW6xHusA==",
"version": "1.56.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.56.1.tgz",
"integrity": "sha512-hutraynyn31F+Bifme+Ps9Vq59hKuUCz7H1kDOcBs+2oGguKkWTU50bBWrtz34OUWmIwpBTWDxaRPXrIXkgvmQ==",
"dev": true,
"license": "Apache-2.0",
"bin": {
@@ -7852,6 +7718,13 @@
"dev": true,
"license": "MIT"
},
"node_modules/scule": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/scule/-/scule-1.3.0.tgz",
"integrity": "sha512-6FtHJEvt+pVMIB9IBY+IcCJ6Z5f1iQnytgyfKMhDKgmzYG+TeH/wx1y3l27rshSbLiSanrR9ffZDrEsmjlQF2g==",
"dev": true,
"license": "MIT"
},
"node_modules/secure-compare": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/secure-compare/-/secure-compare-3.0.1.tgz",
@@ -8052,26 +7925,26 @@
"license": "MIT"
},
"node_modules/storybook": {
"version": "9.0.17",
"resolved": "https://registry.npmjs.org/storybook/-/storybook-9.0.17.tgz",
"integrity": "sha512-O+9jgJ+Trlq9VGD1uY4OBLKQWHHDKM/A/pA8vMW6PVehhGHNvpzcIC1bngr6mL5gGHZP2nBv+9XG8pTMcggMmg==",
"version": "10.0.7",
"resolved": "https://registry.npmjs.org/storybook/-/storybook-10.0.7.tgz",
"integrity": "sha512-7smAu0o+kdm378Q2uIddk32pn0UdIbrtTVU+rXRVtTVTCrK/P2cCui2y4JH+Bl3NgEq1bbBQpCAF/HKrDjk2Qw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@storybook/global": "^5.0.0",
"@storybook/icons": "^1.6.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/user-event": "^14.6.1",
"@vitest/expect": "3.2.4",
"@vitest/mocker": "3.2.4",
"@vitest/spy": "3.2.4",
"better-opn": "^3.0.2",
"esbuild": "^0.18.0 || ^0.19.0 || ^0.20.0 || ^0.21.0 || ^0.22.0 || ^0.23.0 || ^0.24.0 || ^0.25.0",
"esbuild-register": "^3.5.0",
"recast": "^0.23.5",
"semver": "^7.6.2",
"ws": "^8.18.0"
},
"bin": {
"storybook": "bin/index.cjs"
"storybook": "dist/bin/dispatcher.js"
},
"funding": {
"type": "opencollective",
@@ -8418,14 +8291,14 @@
}
},
"node_modules/svelte2tsx": {
"version": "0.7.41",
"resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.7.41.tgz",
"integrity": "sha512-/TUwpyn/Qc1wcGuayf2GSwvZ7htdAOzpo0JFFm96srKnRXoTD0gy4n06g+XgH8w016S3lPtyFVtFAm+0yJ0BZw==",
"version": "0.7.45",
"resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.7.45.tgz",
"integrity": "sha512-cSci+mYGygYBHIZLHlm/jYlEc1acjAHqaQaDFHdEBpUueM9kSTnPpvPtSl5VkJOU1qSJ7h1K+6F/LIUYiqC8VA==",
"dev": true,
"license": "MIT",
"dependencies": {
"dedent-js": "^1.0.1",
"pascal-case": "^3.1.1"
"scule": "^1.3.0"
},
"peerDependencies": {
"svelte": "^3.55 || ^4.0.0-next.0 || ^4.0 || ^5.0.0-next.0",
@@ -8535,14 +8408,14 @@
"license": "MIT"
},
"node_modules/tinyglobby": {
"version": "0.2.14",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"fdir": "^6.4.4",
"picomatch": "^4.0.2"
"fdir": "^6.5.0",
"picomatch": "^4.0.3"
},
"engines": {
"node": ">=12.0.0"
@@ -8918,17 +8791,19 @@
}
},
"node_modules/unplugin": {
"version": "1.16.1",
"resolved": "https://registry.npmjs.org/unplugin/-/unplugin-1.16.1.tgz",
"integrity": "sha512-4/u/j4FrCKdi17jaxuJA0jClGxB1AvU2hw/IuayPc4ay1XGaJs/rbb4v5WKwAjNifjmXK9PIFyuPiaK8azyR9w==",
"version": "2.3.10",
"resolved": "https://registry.npmjs.org/unplugin/-/unplugin-2.3.10.tgz",
"integrity": "sha512-6NCPkv1ClwH+/BGE9QeoTIl09nuiAt0gS28nn1PvYXsGKRwM2TCbFA2QiilmehPDTXIe684k4rZI1yl3A1PCUw==",
"dev": true,
"license": "MIT",
"dependencies": {
"acorn": "^8.14.0",
"@jridgewell/remapping": "^2.3.5",
"acorn": "^8.15.0",
"picomatch": "^4.0.3",
"webpack-virtual-modules": "^0.6.2"
},
"engines": {
"node": ">=14.0.0"
"node": ">=18.12.0"
}
},
"node_modules/uri-js": {
@@ -9054,18 +8929,18 @@
}
},
"node_modules/vite": {
"version": "7.0.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.5.tgz",
"integrity": "sha512-1mncVwJxy2C9ThLwz0+2GKZyEXuC3MyWtAAlNftlZZXZDP3AJt5FmwcMit/IGGaNZ8ZOB2BNO/HFUB+CpN0NQw==",
"version": "7.2.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.6",
"picomatch": "^4.0.2",
"fdir": "^6.5.0",
"picomatch": "^4.0.3",
"postcss": "^8.5.6",
"rollup": "^4.40.0",
"tinyglobby": "^0.2.14"
"rollup": "^4.43.0",
"tinyglobby": "^0.2.15"
},
"bin": {
"vite": "bin/vite.js"

View File

@@ -24,20 +24,20 @@
"cleanup": "rm -rf .svelte-kit build node_modules test-results"
},
"devDependencies": {
"@chromatic-com/storybook": "^4.0.1",
"@chromatic-com/storybook": "^4.1.2",
"@eslint/compat": "^1.2.5",
"@eslint/js": "^9.18.0",
"@internationalized/date": "^3.8.2",
"@lucide/svelte": "^0.515.0",
"@playwright/test": "^1.49.1",
"@storybook/addon-a11y": "^9.0.17",
"@storybook/addon-docs": "^9.0.17",
"@storybook/addon-svelte-csf": "^5.0.7",
"@storybook/addon-vitest": "^9.0.17",
"@storybook/sveltekit": "^9.0.17",
"@sveltejs/adapter-static": "^3.0.8",
"@sveltejs/kit": "^2.22.0",
"@sveltejs/vite-plugin-svelte": "^6.0.0",
"@storybook/addon-a11y": "^10.0.7",
"@storybook/addon-docs": "^10.0.7",
"@storybook/addon-svelte-csf": "^5.0.10",
"@storybook/addon-vitest": "^10.0.7",
"@storybook/sveltekit": "^10.0.7",
"@sveltejs/adapter-static": "^3.0.10",
"@sveltejs/kit": "^2.48.4",
"@sveltejs/vite-plugin-svelte": "^6.2.1",
"@tailwindcss/forms": "^0.5.9",
"@tailwindcss/typography": "^0.5.15",
"@tailwindcss/vite": "^4.0.0",
@@ -48,21 +48,21 @@
"dexie": "^4.0.11",
"eslint": "^9.18.0",
"eslint-config-prettier": "^10.0.1",
"eslint-plugin-storybook": "^9.0.17",
"eslint-plugin-storybook": "^10.0.7",
"eslint-plugin-svelte": "^3.0.0",
"fflate": "^0.8.2",
"globals": "^16.0.0",
"http-server": "^14.1.1",
"mdast": "^3.0.0",
"mdsvex": "^0.12.3",
"playwright": "^1.53.0",
"playwright": "^1.56.1",
"prettier": "^3.4.2",
"prettier-plugin-svelte": "^3.3.3",
"prettier-plugin-tailwindcss": "^0.6.11",
"rehype-katex": "^7.0.1",
"remark-math": "^6.0.0",
"sass": "^1.93.3",
"storybook": "^9.0.17",
"storybook": "^10.0.7",
"svelte": "^5.0.0",
"svelte-check": "^4.0.0",
"tailwind-merge": "^3.3.1",
@@ -73,7 +73,7 @@
"typescript-eslint": "^8.20.0",
"unified": "^11.0.5",
"uuid": "^13.0.0",
"vite": "^7.0.4",
"vite": "^7.2.2",
"vite-plugin-devtools-json": "^0.2.0",
"vitest": "^3.2.3",
"vitest-browser-svelte": "^0.1.0"

View File

@@ -1,7 +1,7 @@
<script module lang="ts">
import { defineMeta } from '@storybook/addon-svelte-csf';
import ChatForm from '$lib/components/app/chat/ChatForm/ChatForm.svelte';
import { expect } from 'storybook/internal/test';
import { expect } from 'storybook/test';
import { mockServerProps, mockConfigs } from './fixtures/storybook-mocks';
import jpgAsset from './fixtures/assets/1.jpg?url';
import svgAsset from './fixtures/assets/hf-logo.svg?url';

View File

@@ -1,7 +1,7 @@
<script module lang="ts">
import { defineMeta } from '@storybook/addon-svelte-csf';
import ChatSidebar from '$lib/components/app/chat/ChatSidebar/ChatSidebar.svelte';
import { waitFor } from 'storybook/internal/test';
import { waitFor } from 'storybook/test';
import { screen } from 'storybook/test';
const { Story } = defineMeta({

View File

@@ -1,5 +1,6 @@
<script module lang="ts">
import { defineMeta } from '@storybook/addon-svelte-csf';
import { expect } from 'storybook/test';
import { MarkdownContent } from '$lib/components/app';
import { AI_TUTORIAL_MD } from './fixtures/ai-tutorial.js';
import { API_DOCS_MD } from './fixtures/api-docs.js';
@@ -68,64 +69,62 @@ All links should have \`target="_blank"\` and \`rel="noopener noreferrer"\` attr
class: 'max-w-[56rem] w-[calc(100vw-2rem)]'
}}
play={async ({ canvasElement }) => {
const { expect } = await import('storybook/internal/test');
// Wait for component to render
await new Promise(resolve => setTimeout(resolve, 100));
await new Promise((resolve) => setTimeout(resolve, 100));
// Find all links in the rendered content
const links = canvasElement.querySelectorAll('a[href]');
// Test that we have the expected number of links
expect(links.length).toBeGreaterThan(0);
// Test each link for proper attributes
links.forEach((link) => {
const href = link.getAttribute('href');
// Test that external links have proper security attributes
if (href && (href.startsWith('http://') || href.startsWith('https://'))) {
expect(link.getAttribute('target')).toBe('_blank');
expect(link.getAttribute('rel')).toBe('noopener noreferrer');
}
});
// Test specific links exist
const hugginFaceLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://huggingface.co'
const hugginFaceLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://huggingface.co'
);
expect(hugginFaceLink).toBeTruthy();
expect(hugginFaceLink?.textContent).toBe('Hugging Face Homepage');
const githubLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://github.com/ggml-org/llama.cpp'
const githubLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://github.com/ggml-org/llama.cpp'
);
expect(githubLink).toBeTruthy();
expect(githubLink?.textContent).toBe('GitHub Repository');
const openaiLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://openai.com'
const openaiLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://openai.com'
);
expect(openaiLink).toBeTruthy();
expect(openaiLink?.textContent).toBe('OpenAI Website');
const googleLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://www.google.com'
const googleLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://www.google.com'
);
expect(googleLink).toBeTruthy();
expect(googleLink?.textContent).toBe('Google Search');
// Test inline links (auto-linked URLs)
const exampleLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://example.com'
const exampleLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://example.com'
);
expect(exampleLink).toBeTruthy();
const pythonDocsLink = Array.from(links).find(link =>
link.getAttribute('href') === 'https://docs.python.org'
const pythonDocsLink = Array.from(links).find(
(link) => link.getAttribute('href') === 'https://docs.python.org'
);
expect(pythonDocsLink).toBeTruthy();
console.log(`✅ URL Links test passed - Found ${links.length} links with proper attributes`);
}}
/>