mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-13 04:24:17 +00:00
@@ -10702,7 +10702,7 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) {
|
||||
// each move requires 6*n_layer tensors (see build_defrag)
|
||||
// - source view, destination view, copy operation
|
||||
// - x2 for keys and values
|
||||
const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer);
|
||||
const uint32_t max_moves = (LLAMA_MAX_NODES - 2*n_layer)/(6*n_layer);
|
||||
|
||||
// determine which KV cells to move where
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user