mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-14 13:04:08 +00:00
common: ngram map, config self-speculative decoding
This commit is contained in:
@@ -234,10 +234,11 @@ task_params server_task::params_from_json_cmpl(
|
||||
params.sampling.backend_sampling = json_value(data, "backend_sampling", defaults.sampling.backend_sampling);
|
||||
params.post_sampling_probs = json_value(data, "post_sampling_probs", defaults.post_sampling_probs);
|
||||
|
||||
params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
||||
params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
||||
params.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
||||
params.speculative.use_self = json_value(data, "speculative.use_self", defaults.speculative.use_self);
|
||||
params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
|
||||
params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);
|
||||
params.speculative.p_min = json_value(data, "speculative.p_min", defaults.speculative.p_min);
|
||||
params.speculative.self_mode = json_value(data, "speculative.self_mode", defaults.speculative.self_mode);
|
||||
params.speculative.self_cfg = json_value(data, "speculative.self_cfg", defaults.speculative.self_cfg);
|
||||
|
||||
params.speculative.n_min = std::min(params.speculative.n_max, params.speculative.n_min);
|
||||
params.speculative.n_min = std::max(params.speculative.n_min, 0);
|
||||
|
||||
Reference in New Issue
Block a user