graph : remove redundant scale_w parameter (#20235)

2026-05-13 20:44:09 +00:00 · 2026-03-08 18:58:28 +01:00
parent 451ef08432
commit 35bee031e1
41 changed files with 85 additions and 86 deletions
--- a/src/models/bert.cpp
+++ b/src/models/bert.cpp
@@ -1,7 +1,5 @@
 #include "models.h"

-
-
 llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
    const int64_t n_embd_head = hparams.n_embd_head_v;
    const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
@@ -129,9 +127,17 @@ llm_build_bert::llm_build_bert(const llama_model & model, const llm_graph_params
        // feed-forward network
        if (hparams.moe_every_n_layers > 0 && il % hparams.moe_every_n_layers == 1) {
            // MoE branch
-            cur = build_moe_ffn(cur, model.layers[il].ffn_gate_inp, model.layers[il].ffn_up_exps, nullptr,
-                                model.layers[il].ffn_down_exps, nullptr, hparams.n_expert, hparams.n_expert_used,
-                                LLM_FFN_GELU, false, false, 0.0f, LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, il);
+            cur = build_moe_ffn(cur,
+                    model.layers[il].ffn_gate_inp,
+                    model.layers[il].ffn_up_exps,
+                    nullptr,
+                    model.layers[il].ffn_down_exps,
+                    nullptr,
+                    hparams.n_expert, hparams.n_expert_used,
+                    LLM_FFN_GELU, false,
+                    hparams.expert_weights_scale,
+                    LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
+                    il);
            cb(cur, "ffn_moe_out", il);
        } else if (model.arch == LLM_ARCH_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE ||
                   model.arch == LLM_ARCH_JINA_BERT_V3) {