handle models that need to be downloaded before estimation

This commit is contained in:
Ruben Ortlam
2026-04-20 14:48:55 +02:00
parent 1a8aec0afd
commit b1623a614c
5 changed files with 151 additions and 9 deletions

View File

@@ -83,6 +83,11 @@ int main(int argc, char ** argv) {
return 1;
}
if (params.download_only) {
LOG_INF("%s: model downloaded successfully, exiting\n", __func__);
return 0;
}
// validate batch size for embeddings
// embeddings require all tokens to be processed in a single ubatch
// see https://github.com/ggml-org/llama.cpp/issues/12836