diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b163468789..f3d8317e1a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4542,6 +4542,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_CEIL: case GGML_UNARY_OP_ROUND: case GGML_UNARY_OP_TRUNC: + // TODO: should become: + //return ggml_is_contiguous_rows(op->src[0]); return ggml_is_contiguous(op->src[0]); default: return false; diff --git a/src/models/qwen3next.cpp b/src/models/qwen3next.cpp index d2571cffc5..2fc71a6151 100644 --- a/src/models/qwen3next.cpp +++ b/src/models/qwen3next.cpp @@ -504,6 +504,9 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn( cb(Qcur, "Qcur", il); cb(gate, "gate", il); + // TODO: CUDA is missing non-contiguous unary ops. when implemented: remove this cont + gate = ggml_cont_2d(ctx0, gate, n_embd_head * n_head, n_tokens); + gate = ggml_sigmoid(ctx0, gate); cb(gate, "gate_sigmoid", il);