From fa595462ca2d5ea0feca20c41bd1431b3d4285ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 5 May 2026 05:34:44 +0200 Subject: [PATCH] graph : handle non-contiguous Q/K/V in mul_mat_aux (#22630) * qkv may not always be contiguous * cont : make the cont conditional --------- Co-authored-by: Georgi Gerganov --- src/llama-graph.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index c4cf0ccb26..fe155c92de 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -65,7 +65,11 @@ static ggml_tensor * ggml_mul_mat_aux( ggml_tensor * res; - res = ggml_reshape_2d(ctx, cur, n, ggml_nelements(cur)/n); + if (!ggml_is_contiguous(cur)) { + res = ggml_cont_2d (ctx, cur, n, ggml_nelements(cur)/n); + } else { + res = ggml_reshape_2d(ctx, cur, n, ggml_nelements(cur)/n); + } res = ggml_mul_mat (ctx, rot, res); ggml_mul_mat_set_hint(res, GGML_HINT_SRC0_IS_HADAMARD); res = ggml_reshape_4d(ctx, res, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);