mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-14 04:54:06 +00:00
wip
This commit is contained in:
@@ -4542,6 +4542,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_UNARY_OP_CEIL:
|
||||
case GGML_UNARY_OP_ROUND:
|
||||
case GGML_UNARY_OP_TRUNC:
|
||||
// TODO: should become:
|
||||
//return ggml_is_contiguous_rows(op->src[0]);
|
||||
return ggml_is_contiguous(op->src[0]);
|
||||
default:
|
||||
return false;
|
||||
|
||||
@@ -504,6 +504,9 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn(
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(gate, "gate", il);
|
||||
|
||||
// TODO: CUDA is missing non-contiguous unary ops. when implemented: remove this cont
|
||||
gate = ggml_cont_2d(ctx0, gate, n_embd_head * n_head, n_tokens);
|
||||
|
||||
gate = ggml_sigmoid(ctx0, gate);
|
||||
cb(gate, "gate_sigmoid", il);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user