From ddaafa3dc187c91bb0395229924cb7013758fc71 Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Thu, 9 Apr 2026 11:11:17 +0200 Subject: [PATCH] state --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 65ea1b3361..417d92ba5b 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -13903,16 +13903,23 @@ static vk_peer_copy_buf * ggml_vk_get_peer_copy_buf(vk_device& src_dev, vk_devic bool use_sync_fd = src_dev->external_semaphore_sync_fd && dst_dev->external_semaphore_sync_fd; staging.use_sync_fd = use_sync_fd; + GGML_LOG_DEBUG("ggml_vulkan: peer_staging init: src=%s dst=%s use_sync_fd=%d\n", + src_dev->name.c_str(), dst_dev->name.c_str(), (int)use_sync_fd); + if (use_sync_fd) { vk::ExportSemaphoreCreateInfo export_ci{ vk::ExternalSemaphoreHandleTypeFlagBits::eSyncFd }; vk::SemaphoreCreateInfo sci{}; sci.setPNext(&export_ci); staging.src_sem = src_dev->device.createSemaphore(sci); + GGML_LOG_DEBUG("ggml_vulkan: created exportable src_sem=%p on %s\n", + (void *)(VkSemaphore)staging.src_sem, src_dev->name.c_str()); } else { vk::SemaphoreTypeCreateInfo tci{ vk::SemaphoreType::eTimeline, 0 }; vk::SemaphoreCreateInfo sci{}; sci.setPNext(&tci); staging.tl_sem = src_dev->device.createSemaphore(sci); + GGML_LOG_DEBUG("ggml_vulkan: created timeline tl_sem=%p on %s\n", + (void *)(VkSemaphore)staging.tl_sem, src_dev->name.c_str()); } } @@ -13971,12 +13978,18 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba size_t src_offset = vk_tensor_offset(src) + src->view_offs; size_t dst_offset = vk_tensor_offset(dst) + dst->view_offs; + GGML_LOG_DEBUG("ggml_vulkan: cross-device copy %s -> %s, nbytes=%zu\n", + src_dev->name.c_str(), dst_dev->name.c_str(), nbytes); + vk_peer_copy_buf * copy_buf = ggml_vk_get_peer_copy_buf(src_dev, dst_dev, nbytes); if (!copy_buf) { + GGML_LOG_DEBUG("ggml_vulkan: cross-device copy: failed to get peer copy buf\n"); return false; } auto& staging = src_dev->peer_staging[dst_dev.get()]; + GGML_LOG_DEBUG("ggml_vulkan: cross-device copy: buf_idx=%zu/%zu use_sync_fd=%d\n", + staging.buf_idx, staging.bufs.size(), (int)staging.use_sync_fd); // HOP 1: src VRAM → staging (on source compute queue) // Implicit queue submission ordering guarantees this @@ -14012,9 +14025,20 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba }; int sync_fd = src_dev->device.getSemaphoreFdKHR(get_fd_info); + GGML_LOG_DEBUG("ggml_vulkan: cross-device sync_fd: src_dev=%s dst_dev=%s " + "nbytes=%zu sync_fd=%d src_sem=%p dst_compute_ctx=%p\n", + src_dev->name.c_str(), dst_dev->name.c_str(), + nbytes, sync_fd, (void *)(VkSemaphore)staging.src_sem, + (void *)dst_compute_ctx.get()); + // Per-copy dest semaphore (temporary import is consumed per wait) vk_semaphore * dst_sem = ggml_vk_create_binary_semaphore(ctx); + GGML_LOG_DEBUG("ggml_vulkan: cross-device importing sync_fd=%d into dst_sem=%p " + "(binary_semaphore_idx=%zu)\n", + sync_fd, (void *)(VkSemaphore)dst_sem->s, + ctx->binary_semaphore_idx); + vk::ImportSemaphoreFdInfoKHR import_info{ dst_sem->s, vk::SemaphoreImportFlagBits::eTemporary, @@ -14023,6 +14047,8 @@ static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_ba }; dst_dev->device.importSemaphoreFdKHR(import_info); + GGML_LOG_DEBUG("ggml_vulkan: cross-device import succeeded\n"); + dst_compute_ctx->s->wait_semaphores.push_back({ dst_sem->s, 0 }); } else { // Tier 2: timeline semaphore with CPU wait