From ddaafa3dc187c91bb0395229924cb7013758fc71 Mon Sep 17 00:00:00 2001
From: Ruben Ortlam <rortlam@redhat.com>
Date: Thu, 9 Apr 2026 11:11:17 +0200
Subject: [PATCH] state

---
 ggml/src/ggml-vulkan/ggml-vulkan.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 65ea1b3361..417d92ba5b 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13903,16 +13903,23 @@ static vk_peer_copy_buf * ggml_vk_get_peer_copy_buf(vk_device& src_dev, vk_devic
         bool use_sync_fd = src_dev->external_semaphore_sync_fd && dst_dev->external_semaphore_sync_fd;
         staging.use_sync_fd = use_sync_fd;
 
+        GGML_LOG_DEBUG("ggml_vulkan: peer_staging init: src=%s dst=%s use_sync_fd=%d\n",
+                       src_dev->name.c_str(), dst_dev->name.c_str(), (int)use_sync_fd);
+
         if (use_sync_fd) {
             vk::ExportSemaphoreCreateInfo export_ci{ vk::ExternalSemaphoreHandleTypeFlagBits::eSyncFd };
             vk::SemaphoreCreateInfo sci{};
             sci.setPNext(&export_ci);
             staging.src_sem = src_dev->device.createSemaphore(sci);
+            GGML_LOG_DEBUG("ggml_vulkan: created exportable src_sem=%p on %s\n",
+                           (void *)(VkSemaphore)staging.src_sem, src_dev->name.c_str());
         } else {
             vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 };
             vk::SemaphoreCreateInfo sci{};
             sci.setPNext(&tci);
             staging.tl_sem = src_dev->device.createSemaphore(sci);
+            GGML_LOG_DEBUG("ggml_vulkan: created timeline tl_sem=%p on %s\n",
+                           (void *)(VkSemaphore)staging.tl_sem, src_dev->name.c_str());
         }
     }
 
@@ -13971,12 +13978,18 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba
             size_t src_offset = vk_tensor_offset(src) + src->view_offs;
             size_t dst_offset = vk_tensor_offset(dst) + dst->view_offs;
 
+            GGML_LOG_DEBUG("ggml_vulkan: cross-device copy %s -> %s, nbytes=%zu\n",
+                           src_dev->name.c_str(), dst_dev->name.c_str(), nbytes);
+
             vk_peer_copy_buf * copy_buf = ggml_vk_get_peer_copy_buf(src_dev, dst_dev, nbytes);
             if (!copy_buf) {
+                GGML_LOG_DEBUG("ggml_vulkan: cross-device copy: failed to get peer copy buf\n");
                 return false;
             }
 
             auto& staging = src_dev->peer_staging[dst_dev.get()];
+            GGML_LOG_DEBUG("ggml_vulkan: cross-device copy: buf_idx=%zu/%zu use_sync_fd=%d\n",
+                           staging.buf_idx, staging.bufs.size(), (int)staging.use_sync_fd);
 
             // HOP 1: src VRAM → staging (on source compute queue)
             // Implicit queue submission ordering guarantees this
@@ -14012,9 +14025,20 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba
                 };
                 int sync_fd = src_dev->device.getSemaphoreFdKHR(get_fd_info);
 
+                GGML_LOG_DEBUG("ggml_vulkan: cross-device sync_fd: src_dev=%s dst_dev=%s "
+                               "nbytes=%zu sync_fd=%d src_sem=%p dst_compute_ctx=%p\n",
+                               src_dev->name.c_str(), dst_dev->name.c_str(),
+                               nbytes, sync_fd, (void *)(VkSemaphore)staging.src_sem,
+                               (void *)dst_compute_ctx.get());
+
                 // Per-copy dest semaphore (temporary import is consumed per wait)
                 vk_semaphore * dst_sem = ggml_vk_create_binary_semaphore(ctx);
 
+                GGML_LOG_DEBUG("ggml_vulkan: cross-device importing sync_fd=%d into dst_sem=%p "
+                               "(binary_semaphore_idx=%zu)\n",
+                               sync_fd, (void *)(VkSemaphore)dst_sem->s,
+                               ctx->binary_semaphore_idx);
+
                 vk::ImportSemaphoreFdInfoKHR import_info{
                     dst_sem->s,
                     vk::SemaphoreImportFlagBits::eTemporary,
@@ -14023,6 +14047,8 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba
                 };
                 dst_dev->device.importSemaphoreFdKHR(import_info);
 
+                GGML_LOG_DEBUG("ggml_vulkan: cross-device import succeeded\n");
+
                 dst_compute_ctx->s->wait_semaphores.push_back({ dst_sem->s, 0 });
             } else {
                 // Tier 2: timeline semaphore with CPU wait