handle models that need to be downloaded before estimation

2026-05-13 12:34:05 +00:00 · 2026-04-20 14:48:55 +02:00
parent 1a8aec0afd
commit b1623a614c
5 changed files with 151 additions and 9 deletions
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -83,6 +83,11 @@ int main(int argc, char ** argv) {
        return 1;
    }

+    if (params.download_only) {
+        LOG_INF("%s: model downloaded successfully, exiting\n", __func__);
+        return 0;
+    }
+
    // validate batch size for embeddings
    // embeddings require all tokens to be processed in a single ubatch
    // see https://github.com/ggml-org/llama.cpp/issues/12836