mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-08 01:54:10 +00:00
graph : handle non-contiguous Q/K/V in mul_mat_aux (#22630)
* qkv may not always be contiguous * cont : make the cont conditional --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -65,7 +65,11 @@ static ggml_tensor * ggml_mul_mat_aux(
|
||||
|
||||
ggml_tensor * res;
|
||||
|
||||
res = ggml_reshape_2d(ctx, cur, n, ggml_nelements(cur)/n);
|
||||
if (!ggml_is_contiguous(cur)) {
|
||||
res = ggml_cont_2d (ctx, cur, n, ggml_nelements(cur)/n);
|
||||
} else {
|
||||
res = ggml_reshape_2d(ctx, cur, n, ggml_nelements(cur)/n);
|
||||
}
|
||||
res = ggml_mul_mat (ctx, rot, res);
|
||||
ggml_mul_mat_set_hint(res, GGML_HINT_SRC0_IS_HADAMARD);
|
||||
res = ggml_reshape_4d(ctx, res, cur->ne[0], cur->ne[1], cur->ne[2], cur->ne[3]);
|
||||
|
||||
Reference in New Issue
Block a user