From bf76ac77bed3f53a6d35968019ee34078bebf701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Tue, 5 May 2026 09:23:50 +0200 Subject: [PATCH] common : only load backends when required (#22290) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * common : only load backends when required Signed-off-by: Adrien Gallouët * llama : call ggml_backend_load_all() directly from llama_backend_init() Signed-off-by: Adrien Gallouët * Add ggml_backend_load_all() where llama_backend_init() is not used Signed-off-by: Adrien Gallouët --------- Signed-off-by: Adrien Gallouët --- common/arg.cpp | 8 +++++--- examples/save-load-state/save-load-state.cpp | 3 +++ src/llama.cpp | 4 ++++ tests/test-state-restore-fragmented.cpp | 3 +++ tools/mtmd/debug/mtmd-debug.cpp | 2 ++ tools/mtmd/mtmd-cli.cpp | 2 ++ 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index bd1a745e6a..a5951a7dfe 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -248,6 +248,8 @@ std::vector common_arg::get_env() const { // Helper function to parse tensor buffer override strings static void parse_tensor_buffer_overrides(const std::string & value, std::vector & overrides) { + ggml_backend_load_all(); + std::map buft_list; for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { auto * dev = ggml_backend_dev_get(i); @@ -803,6 +805,7 @@ static std::vector parse_device_list(const std::string & val if (dev_names.size() == 1 && dev_names[0] == "none") { devices.push_back(nullptr); } else { + ggml_backend_load_all(); for (const auto & device : dev_names) { auto * dev = ggml_backend_dev_by_name(device.c_str()); if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) { @@ -820,6 +823,7 @@ static void add_rpc_devices(const std::string & servers) { if (rpc_servers.empty()) { throw std::invalid_argument("no RPC servers specified"); } + ggml_backend_load_all(); ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); if (!rpc_reg) { throw std::invalid_argument("failed to find RPC backend"); @@ -1016,9 +1020,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.use_color = tty_can_use_colors(); - // load dynamic backends - ggml_backend_load_all(); - common_params_context ctx_arg(params); ctx_arg.print_usage = print_usage; ctx_arg.ex = ex; @@ -2275,6 +2276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--list-devices"}, "print list of available devices and exit", [](common_params &) { + ggml_backend_load_all(); std::vector devices; for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { auto * dev = ggml_backend_dev_get(i); diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 3a8d0b384c..e6f5e9802a 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -41,6 +41,9 @@ int main(int argc, char ** argv) { std::string result3; // init + + ggml_backend_load_all(); + auto llama_init = common_init_from_params(params); auto * model = llama_init->model(); diff --git a/src/llama.cpp b/src/llama.cpp index 97529d8b75..b5dd8433c4 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -89,6 +89,10 @@ void llama_backend_init(void) { struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } + + if (!ggml_backend_reg_count()) { + ggml_backend_load_all(); + } } void llama_numa_init(enum ggml_numa_strategy numa) { diff --git a/tests/test-state-restore-fragmented.cpp b/tests/test-state-restore-fragmented.cpp index 8a9bfaf5dc..d5548afba1 100644 --- a/tests/test-state-restore-fragmented.cpp +++ b/tests/test-state-restore-fragmented.cpp @@ -29,6 +29,9 @@ int main(int argc, char ** argv) { } // init + + ggml_backend_load_all(); + common_init_result_ptr llama_init = common_init_from_params(params); llama_model * model = llama_init->model(); diff --git a/tools/mtmd/debug/mtmd-debug.cpp b/tools/mtmd/debug/mtmd-debug.cpp index 1e41ef793b..f19ca4cfe2 100644 --- a/tools/mtmd/debug/mtmd-debug.cpp +++ b/tools/mtmd/debug/mtmd-debug.cpp @@ -68,6 +68,8 @@ int main(int argc, char ** argv) { return 1; } + ggml_backend_load_all(); + LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str()); mtmd::context_ptr ctx_mtmd; diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp index be958bd175..d6e551618e 100644 --- a/tools/mtmd/mtmd-cli.cpp +++ b/tools/mtmd/mtmd-cli.cpp @@ -295,6 +295,8 @@ int main(int argc, char ** argv) { return 1; } + ggml_backend_load_all(); + mtmd_cli_context ctx(params); LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str());