mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-03-17 16:44:07 +00:00
server : do not create checkpoints right after mtmd chunks (#20232)
This commit is contained in:
@@ -2438,6 +2438,8 @@ private:
|
||||
slot.n_prompt_tokens_cache = 0;
|
||||
}
|
||||
|
||||
bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
|
||||
|
||||
// check if we should process the image
|
||||
if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
|
||||
// process the image
|
||||
@@ -2457,6 +2459,8 @@ private:
|
||||
const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());
|
||||
slot.prompt.tokens.push_back(chunk.get()); // copy
|
||||
}
|
||||
|
||||
do_checkpoint = false; // do not checkpoint right after an image chunk
|
||||
}
|
||||
|
||||
// If using an alora, there may be uncached tokens that come
|
||||
@@ -2473,8 +2477,6 @@ private:
|
||||
alora_disabled_id = enabled_loras[0];
|
||||
}
|
||||
|
||||
bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
|
||||
|
||||
// make checkpoints only for completion tasks
|
||||
do_checkpoint = do_checkpoint && slot.task->type == SERVER_TASK_TYPE_COMPLETION;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user