spec : disacard last drafted token with low prob (#22506)

2026-05-14 21:14:10 +00:00 · 2026-04-29 17:00:00 +03:00
parent b1d5f5b449
commit 683c5acb90
2 changed files with 7 additions and 7 deletions
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -354,6 +354,7 @@ struct server_slot {

                // generate a new draft
                spec_draft = common_speculative_draft(spec.get(), params_spec, tokens, sampled);
+                n_draft_total += spec_draft.size();

                if (spec_draft.size() > (size_t) n_draft_max) {
                    SLT_WRN(*this, "draft size %d exceeds max %d, truncating\n", (int) spec_draft.size(), n_draft_max);
@@ -3019,7 +3020,6 @@ private:

                // update how many tokens out of those tested were accepted
                slot.n_draft_accepted += ids.size() - 1;
-                slot.n_draft_total += n_draft;

                // add accepted tokens to the prompt
                slot.prompt.tokens.keep_first(slot.prompt.n_tokens() - n_draft);