From e480e383fdbdec5e71d4a2de64b69a055cc2505c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 10 Feb 2026 20:50:47 +0200 Subject: [PATCH] wip --- ggml/src/ggml-cuda/ggml-cuda.cu | 2 ++ src/models/qwen3next.cpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b163468789..f3d8317e1a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4542,6 +4542,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_CEIL: case GGML_UNARY_OP_ROUND: case GGML_UNARY_OP_TRUNC: + // TODO: should become: + //return ggml_is_contiguous_rows(op->src[0]); return ggml_is_contiguous(op->src[0]); default: return false; diff --git a/src/models/qwen3next.cpp b/src/models/qwen3next.cpp index d2571cffc5..2fc71a6151 100644 --- a/src/models/qwen3next.cpp +++ b/src/models/qwen3next.cpp @@ -504,6 +504,9 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn( cb(Qcur, "Qcur", il); cb(gate, "gate", il); + // TODO: CUDA is missing non-contiguous unary ops. when implemented: remove this cont + gate = ggml_cont_2d(ctx0, gate, n_embd_head * n_head, n_tokens); + gate = ggml_sigmoid(ctx0, gate); cb(gate, "gate_sigmoid", il);