spec : support parallel drafts

This commit is contained in:
Georgi Gerganov
2026-05-08 18:53:33 +03:00
parent 927d6635d3
commit f88c942861
4 changed files with 196 additions and 103 deletions

View File

@@ -336,14 +336,16 @@ struct server_slot {
}
// generate a new draft
auto dparams = common_speculative_draft_params {
/* .n_max = */ n_draft_max,
/* .n_past = */ prompt.n_tokens(),
/* .id_last = */ sampled,
/* .prompt = */ tokens_text,
/* .result = */ spec_draft,
common_speculative_draft_params_map dparams;
dparams[this->id] = common_speculative_draft_params {
/* .drafting = */ true,
/* .n_max = */ n_draft_max,
/* .n_past = */ prompt.n_tokens(),
/* .id_last = */ sampled,
/* .prompt = */ &tokens_text,
/* .result = */ &spec_draft,
};
common_speculative_draft(spec, this->id, dparams);
common_speculative_draft(spec, dparams);
n_draft_total += spec_draft.size();
if (spec_draft.size() > (size_t) n_draft_max) {