wip

2026-05-14 04:54:06 +00:00 · 2026-02-10 20:50:47 +02:00
parent 1c312dc758
commit e480e383fd
2 changed files with 5 additions and 0 deletions
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4542,6 +4542,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                case GGML_UNARY_OP_CEIL:
                case GGML_UNARY_OP_ROUND:
                case GGML_UNARY_OP_TRUNC:
+                    // TODO: should become:
+                    //return ggml_is_contiguous_rows(op->src[0]);
                    return ggml_is_contiguous(op->src[0]);
                default:
                    return false;
--- a/src/models/qwen3next.cpp
+++ b/src/models/qwen3next.cpp
@@ -504,6 +504,9 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn(
    cb(Qcur, "Qcur", il);
    cb(gate, "gate", il);

+    // TODO: CUDA is missing non-contiguous unary ops. when implemented: remove this cont
+    gate = ggml_cont_2d(ctx0, gate, n_embd_head * n_head, n_tokens);
+
    gate = ggml_sigmoid(ctx0, gate);
    cb(gate, "gate_sigmoid", il);