From c417ddfc7431ba43f47a02e2d31bce3ee3b04d48 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Mon, 11 May 2026 12:22:37 +0800 Subject: [PATCH] fix batch size --- common/speculative.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/speculative.cpp b/common/speculative.cpp index 89e89c659c..ef13edd34e 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -413,11 +413,11 @@ struct common_speculative_state_mtp : public common_speculative_impl { n_embd = llama_model_n_embd(llama_get_model(ctx_dft)); - const int32_t n_ub = (int32_t) llama_n_ubatch(ctx_dft); - batch = llama_batch_init(/*n_tokens=*/ n_ub, /*embd=*/ n_embd, /*n_seq_max=*/ 1); + const int32_t n_b = (int32_t) llama_n_batch(ctx_dft); + batch = llama_batch_init(/*n_tokens=*/ n_b, /*embd=*/ n_embd, /*n_seq_max=*/ 1); // llama_batch_init allocates only one of token/embd; MTP needs both. // TODO: fix, how to call without malloc - batch.token = (llama_token *) malloc(sizeof(llama_token) * n_ub); + batch.token = (llama_token *) malloc(sizeof(llama_token) * n_b); smpls.resize(n_seq); for (auto & s : smpls) {