common : only load backends when required (#22290)

* common : only load backends when required

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* llama : call ggml_backend_load_all() directly from llama_backend_init()

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

* Add ggml_backend_load_all() where llama_backend_init() is not used

Signed-off-by: Adrien Gallouët <angt@huggingface.co>

---------

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët
2026-05-05 09:23:50 +02:00
committed by GitHub
parent a09a00e502
commit bf76ac77be
6 changed files with 19 additions and 3 deletions

View File

@@ -248,6 +248,8 @@ std::vector<std::string> common_arg::get_env() const {
// Helper function to parse tensor buffer override strings
static void parse_tensor_buffer_overrides(const std::string & value, std::vector<llama_model_tensor_buft_override> & overrides) {
ggml_backend_load_all();
std::map<std::string, ggml_backend_buffer_type_t> buft_list;
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
auto * dev = ggml_backend_dev_get(i);
@@ -803,6 +805,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val
if (dev_names.size() == 1 && dev_names[0] == "none") {
devices.push_back(nullptr);
} else {
ggml_backend_load_all();
for (const auto & device : dev_names) {
auto * dev = ggml_backend_dev_by_name(device.c_str());
if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
@@ -820,6 +823,7 @@ static void add_rpc_devices(const std::string & servers) {
if (rpc_servers.empty()) {
throw std::invalid_argument("no RPC servers specified");
}
ggml_backend_load_all();
ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
if (!rpc_reg) {
throw std::invalid_argument("failed to find RPC backend");
@@ -1016,9 +1020,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.use_color = tty_can_use_colors();
// load dynamic backends
ggml_backend_load_all();
common_params_context ctx_arg(params);
ctx_arg.print_usage = print_usage;
ctx_arg.ex = ex;
@@ -2275,6 +2276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"--list-devices"},
"print list of available devices and exit",
[](common_params &) {
ggml_backend_load_all();
std::vector<ggml_backend_dev_t> devices;
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
auto * dev = ggml_backend_dev_get(i);

View File

@@ -41,6 +41,9 @@ int main(int argc, char ** argv) {
std::string result3;
// init
ggml_backend_load_all();
auto llama_init = common_init_from_params(params);
auto * model = llama_init->model();

View File

@@ -89,6 +89,10 @@ void llama_backend_init(void) {
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
if (!ggml_backend_reg_count()) {
ggml_backend_load_all();
}
}
void llama_numa_init(enum ggml_numa_strategy numa) {

View File

@@ -29,6 +29,9 @@ int main(int argc, char ** argv) {
}
// init
ggml_backend_load_all();
common_init_result_ptr llama_init = common_init_from_params(params);
llama_model * model = llama_init->model();

View File

@@ -68,6 +68,8 @@ int main(int argc, char ** argv) {
return 1;
}
ggml_backend_load_all();
LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str());
mtmd::context_ptr ctx_mtmd;

View File

@@ -295,6 +295,8 @@ int main(int argc, char ** argv) {
return 1;
}
ggml_backend_load_all();
mtmd_cli_context ctx(params);
LOG_INF("%s: loading model: %s\n", __func__, params.model.path.c_str());