graph : make the compute graph constant with respect to active samplers

2026-05-13 04:24:17 +00:00 · 2025-12-10 15:54:33 +02:00
parent 0ecee8be37
commit c02654eb7d
3 changed files with 48 additions and 36 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1241,7 +1241,10 @@ static void copy_tensor_async_ints(

    for (const auto & [seq_id, tensor] : tensor_map) {
        auto it = seq_to_row.find(seq_id);
-        GGML_ASSERT(it != seq_to_row.end());
+        if (it == seq_to_row.end()) {
+            continue;
+        }
+
        const uint32_t row = it->second;
        GGML_ASSERT(row < sampled_size);

@@ -1265,7 +1268,10 @@ static void copy_tensor_async_floats(

    for (const auto & [seq_id, tensor] : tensor_map) {
        auto it = seq_to_row.find(seq_id);
-        GGML_ASSERT(it != seq_to_row.end());
+        if (it == seq_to_row.end()) {
+            continue;
+        }
+
        const uint32_t row = it->second;
        GGML_ASSERT(row < counts.size());

@@ -1293,7 +1299,10 @@ static void copy_tensor_async_candidates(

    for (const auto & [seq_id, tensor] : tensor_map) {
        auto it = seq_to_row.find(seq_id);
-        GGML_ASSERT(it != seq_to_row.end());
+        if (it == seq_to_row.end()) {
+            continue;
+        }
+
        const uint32_t row = it->second;
        GGML_ASSERT(row < counts.size());