mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-13 04:24:17 +00:00
server: add --models-memory-max parameter to allow dynamically unloading models when they exceed a memory size threshold
This commit is contained in:
@@ -3615,6 +3615,7 @@ void server_routes::init_routes() {
|
||||
{ "total_slots", params.n_parallel },
|
||||
{ "model_alias", meta->model_name },
|
||||
{ "model_path", meta->model_path },
|
||||
{ "memory_mb", meta->model_size / (1024 * 1024) },
|
||||
{ "modalities", json {
|
||||
{"vision", meta->has_inp_image},
|
||||
{"audio", meta->has_inp_audio},
|
||||
|
||||
Reference in New Issue
Block a user