sampling : hide prev behind API and apply #3661

ggml-ci
2026-05-15 05:24:06 +00:00 · 2023-10-20 18:26:20 +03:00
parent 7e2b5fb1dd
commit 56ba00b923
9 changed files with 119 additions and 105 deletions
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -330,7 +330,7 @@ int main(int argc, char ** argv) {

                const llama_token id = llama_sampling_sample(client.ctx_sampling, ctx, NULL, client.i_batch - i);

-                llama_sampling_accept(client.ctx_sampling, ctx, id);
+                llama_sampling_accept(client.ctx_sampling, ctx, id, true);

                if (client.n_decoded == 1) {
                    // start measuring generation time after the first token to make sure all concurrent clients