From a0101225bc60908572ed8b4dfd2ee1a61c69b689 Mon Sep 17 00:00:00 2001 From: fl0rianr Date: Wed, 6 May 2026 17:03:45 +0200 Subject: [PATCH] common: do not fit to unknown device memory (#22614) * common: do not fit to unknown device memory Signed-off-by: Florian Reinle * common: preserve host fallback for non-GPU fit devices Signed-off-by: Florian Reinle * common: keep unknown GPU fit memory at zero Signed-off-by: Florian Reinle --------- Signed-off-by: Florian Reinle --- common/fit.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/common/fit.cpp b/common/fit.cpp index aca3f4d407..e66982df5b 100644 --- a/common/fit.cpp +++ b/common/fit.cpp @@ -109,16 +109,24 @@ static std::vector common_get_device_memory_data( ret.back().total = total; } for (size_t i = 0; i < nd; i++) { + ggml_backend_dev_t dev = llama_model_get_device(model, i); + size_t free; size_t total; - ggml_backend_dev_memory(llama_model_get_device(model, i), &free, &total); + ggml_backend_dev_memory(dev, &free, &total); - // devices can return 0 bytes for free and total memory if they do not - // have any to report. in this case, we will use the host memory as a fallback - // fixes: https://github.com/ggml-org/llama.cpp/issues/18577 + // Some non-GPU accelerator backends, such as BLAS, report 0/0 and rely on + // the host-memory fallback. For GPU-like backends, keep 0/0 so --fit does + // not assign anything to a device with an unknown memory budget. if (free == 0 && total == 0) { - free = ret.back().free; - total = ret.back().total; + const enum ggml_backend_dev_type type = ggml_backend_dev_type(dev); + if (type == GGML_BACKEND_DEVICE_TYPE_GPU || type == GGML_BACKEND_DEVICE_TYPE_IGPU) { + LOG_WRN("%s: device %s did not report memory; --fit will not use it\n", + __func__, ggml_backend_dev_name(dev)); + } else { + free = ret.back().free; + total = ret.back().total; + } } ret[i].free = free; ret[i].total = total;