mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-12 20:14:09 +00:00
Merge branch 'master' into HEAD
This commit is contained in:
@@ -157,11 +157,12 @@ task_params server_task::params_from_json_cmpl(
|
||||
|
||||
// Sampling parameter defaults are loaded from the global server context (but individual requests can still them)
|
||||
task_params defaults;
|
||||
defaults.sampling = params_base.sampling;
|
||||
defaults.speculative = params_base.speculative;
|
||||
defaults.n_keep = params_base.n_keep;
|
||||
defaults.n_predict = params_base.n_predict;
|
||||
defaults.antiprompt = params_base.antiprompt;
|
||||
defaults.sampling = params_base.sampling;
|
||||
defaults.speculative = params_base.speculative;
|
||||
defaults.n_keep = params_base.n_keep;
|
||||
defaults.n_predict = params_base.n_predict;
|
||||
defaults.n_cache_reuse = params_base.n_cache_reuse;
|
||||
defaults.antiprompt = params_base.antiprompt;
|
||||
|
||||
// enabling this will output extra debug information in the HTTP responses from the server
|
||||
params.verbose = params_base.verbosity > 9;
|
||||
@@ -177,6 +178,8 @@ task_params server_task::params_from_json_cmpl(
|
||||
params.n_indent = json_value(data, "n_indent", defaults.n_indent);
|
||||
params.n_keep = json_value(data, "n_keep", defaults.n_keep);
|
||||
params.n_discard = json_value(data, "n_discard", defaults.n_discard);
|
||||
params.n_cmpl = json_value(data, "n_cmpl", json_value(data, "n", 1));
|
||||
params.n_cache_reuse = json_value(data, "n_cache_reuse", defaults.n_cache_reuse);
|
||||
//params.t_max_prompt_ms = json_value(data, "t_max_prompt_ms", defaults.t_max_prompt_ms); // TODO: implement
|
||||
params.t_max_predict_ms = json_value(data, "t_max_predict_ms", defaults.t_max_predict_ms);
|
||||
params.response_fields = json_value(data, "response_fields", std::vector<std::string>());
|
||||
@@ -458,6 +461,10 @@ task_params server_task::params_from_json_cmpl(
|
||||
}
|
||||
}
|
||||
|
||||
if (params.n_cmpl > params_base.n_parallel) {
|
||||
throw std::runtime_error("n_cmpl cannot be greater than the number of slots, please increase -np");
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
@@ -669,7 +676,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
|
||||
|
||||
json choice {
|
||||
{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"index", index},
|
||||
{"message", msg.to_json_oaicompat<json>()},
|
||||
};
|
||||
|
||||
@@ -1069,7 +1076,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
|
||||
{"choices", json::array({
|
||||
json {
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"index", index},
|
||||
{"delta", delta},
|
||||
},
|
||||
})},
|
||||
|
||||
Reference in New Issue
Block a user