server: add --models-memory-max parameter to allow dynamically unloading models when they exceed a memory size threshold

This commit is contained in:
Ruben Ortlam
2026-03-29 10:00:49 +02:00
parent 82209efb7e
commit 8e8e200726
5 changed files with 76 additions and 25 deletions

View File

@@ -3615,6 +3615,7 @@ void server_routes::init_routes() {
{ "total_slots", params.n_parallel },
{ "model_alias", meta->model_name },
{ "model_path", meta->model_path },
{ "memory_mb", meta->model_size / (1024 * 1024) },
{ "modalities", json {
{"vision", meta->has_inp_image},
{"audio", meta->has_inp_audio},