spec : support parallel drafts

2026-05-15 13:34:06 +00:00 · 2026-05-08 18:53:33 +03:00
parent 927d6635d3
commit f88c942861
4 changed files with 196 additions and 103 deletions
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -336,14 +336,16 @@ struct server_slot {
                }

                // generate a new draft
-                auto dparams = common_speculative_draft_params {
-                    /* .n_max      = */ n_draft_max,
-                    /* .n_past     = */ prompt.n_tokens(),
-                    /* .id_last    = */ sampled,
-                    /* .prompt     = */ tokens_text,
-                    /* .result     = */ spec_draft,
+                common_speculative_draft_params_map dparams;
+                dparams[this->id] = common_speculative_draft_params {
+                    /* .drafting = */ true,
+                    /* .n_max    = */ n_draft_max,
+                    /* .n_past   = */ prompt.n_tokens(),
+                    /* .id_last  = */ sampled,
+                    /* .prompt   = */ &tokens_text,
+                    /* .result   = */ &spec_draft,
                };
-                common_speculative_draft(spec, this->id, dparams);
+                common_speculative_draft(spec, dparams);
                n_draft_total += spec_draft.size();

                if (spec_draft.size() > (size_t) n_draft_max) {