Files
llama.cpp/kompute/src/Manager.cpp
Adam Treat 0412ec287c Completely revamp how we do object management with the vulkan backend and
stop using so many static objects so we can tear down and bring up vulkan
on new devices in the same runtime.
2023-09-12 14:24:49 -04:00

514 lines
18 KiB
C++

// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include "kompute/Manager.hpp"
#include "fmt/format.h"
#include "kompute/logger/Logger.hpp"
#include <fmt/core.h>
#include <iterator>
#include <set>
#include <sstream>
#include <string>
namespace kp {
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
static VKAPI_ATTR VkBool32 VKAPI_CALL
debugMessageCallback(VkDebugReportFlagsEXT /*flags*/,
VkDebugReportObjectTypeEXT /*objectType*/,
uint64_t /*object*/,
size_t /*location*/,
int32_t /*messageCode*/,
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_DEBUG
const char* pLayerPrefix,
const char* pMessage,
#else
const char* /*pLayerPrefix*/,
const char* /*pMessage*/,
#endif
void* /*pUserData*/)
{
KP_LOG_DEBUG("[VALIDATION]: {} - {}", pLayerPrefix, pMessage);
return VK_FALSE;
}
#endif
Manager::Manager()
{
this->mManageResources = true;
// Make sure the logger is setup
#if !KOMPUTE_OPT_LOG_LEVEL_DISABLED
logger::setupLogger();
#endif
this->createInstance();
}
void Manager::initializeDevice(uint32_t physicalDeviceIndex,
const std::vector<uint32_t>& familyQueueIndices,
const std::vector<std::string>& desiredExtensions)
{
this->createDevice(
familyQueueIndices, physicalDeviceIndex, desiredExtensions);
}
Manager::~Manager()
{
KP_LOG_DEBUG("Kompute Manager Destructor started");
this->destroy();
}
void
Manager::destroy()
{
KP_LOG_DEBUG("Kompute Manager destroy() started");
if (this->mDevice == nullptr) {
KP_LOG_ERROR(
"Kompute Manager destructor reached with null Device pointer");
return;
}
if (this->mManageResources && this->mManagedSequences.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly running destructor for "
"managed sequences");
for (const std::weak_ptr<Sequence>& weakSq : this->mManagedSequences) {
if (std::shared_ptr<Sequence> sq = weakSq.lock()) {
sq->destroy();
}
}
this->mManagedSequences.clear();
}
if (this->mManageResources && !this->mManagedAlgorithmsMap.empty()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms");
for (const auto& kv : this->mManagedAlgorithmsMap) {
if (std::shared_ptr<Algorithm> algorithm = kv.second) {
algorithm->destroy();
}
}
this->mManagedAlgorithmsMap.clear();
}
if (this->mManageResources && this->mManagedTensors.size()) {
KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors");
for (const std::weak_ptr<Tensor>& weakTensor : this->mManagedTensors) {
if (std::shared_ptr<Tensor> tensor = weakTensor.lock()) {
tensor->destroy();
}
}
this->mManagedTensors.clear();
}
if (this->mPipelineCache) {
KP_LOG_DEBUG("Kompute Manager Destroying pipeline cache");
if (!this->mPipelineCache) {
KP_LOG_WARN("Kompute Manager Error requested to destroy "
"pipeline cache but it is null");
}
this->mDevice->destroy(
*this->mPipelineCache,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineCache = nullptr;
}
if (this->mFreeDevice) {
KP_LOG_INFO("Destroying device");
this->mDevice->destroy(
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDevice = nullptr;
KP_LOG_DEBUG("Kompute Manager Destroyed Device");
}
if (this->mInstance == nullptr) {
KP_LOG_ERROR(
"Kompute Manager destructor reached with null Instance pointer");
return;
}
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
if (this->mDebugReportCallback) {
this->mInstance->destroyDebugReportCallbackEXT(
this->mDebugReportCallback, nullptr, this->mDebugDispatcher);
KP_LOG_DEBUG("Kompute Manager Destroyed Debug Report Callback");
}
#endif
if (this->mFreeInstance) {
this->mInstance->destroy(
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mInstance = nullptr;
KP_LOG_DEBUG("Kompute Manager Destroyed Instance");
}
}
void
Manager::createInstance()
{
KP_LOG_DEBUG("Kompute Manager creating instance");
this->mFreeInstance = true;
vk::ApplicationInfo applicationInfo;
applicationInfo.pApplicationName = "Kompute";
applicationInfo.pEngineName = "Kompute";
applicationInfo.apiVersion = KOMPUTE_VK_API_VERSION;
applicationInfo.engineVersion = KOMPUTE_VK_API_VERSION;
applicationInfo.applicationVersion = KOMPUTE_VK_API_VERSION;
std::vector<const char*> applicationExtensions;
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
applicationExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
#endif
vk::InstanceCreateInfo computeInstanceCreateInfo;
computeInstanceCreateInfo.pApplicationInfo = &applicationInfo;
if (!applicationExtensions.empty()) {
computeInstanceCreateInfo.enabledExtensionCount =
(uint32_t)applicationExtensions.size();
computeInstanceCreateInfo.ppEnabledExtensionNames =
applicationExtensions.data();
}
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
KP_LOG_DEBUG("Kompute Manager adding debug validation layers");
// We'll identify the layers that are supported
std::vector<const char*> validLayerNames;
std::vector<const char*> desiredLayerNames = {
"VK_LAYER_LUNARG_assistant_layer",
"VK_LAYER_LUNARG_standard_validation",
"VK_LAYER_KHRONOS_validation",
};
std::vector<std::string> envLayerNames;
const char* envLayerNamesVal = std::getenv("KOMPUTE_ENV_DEBUG_LAYERS");
if (envLayerNamesVal != nullptr && *envLayerNamesVal != '\0') {
KP_LOG_DEBUG("Kompute Manager adding environment layers: {}",
envLayerNamesVal);
std::istringstream iss(envLayerNamesVal);
std::istream_iterator<std::string> beg(iss);
std::istream_iterator<std::string> end;
envLayerNames = std::vector<std::string>(beg, end);
for (const std::string& layerName : envLayerNames) {
desiredLayerNames.push_back(layerName.c_str());
}
KP_LOG_DEBUG("Desired layers: {}", fmt::join(desiredLayerNames, ", "));
}
// Identify the valid layer names based on the desiredLayerNames
{
std::set<std::string> uniqueLayerNames;
std::vector<vk::LayerProperties> availableLayerProperties =
vk::enumerateInstanceLayerProperties();
for (vk::LayerProperties layerProperties : availableLayerProperties) {
std::string layerName(layerProperties.layerName.data());
uniqueLayerNames.insert(layerName);
}
KP_LOG_DEBUG("Available layers: {}", fmt::join(uniqueLayerNames, ", "));
for (const char* desiredLayerName : desiredLayerNames) {
if (uniqueLayerNames.count(desiredLayerName) != 0) {
validLayerNames.push_back(desiredLayerName);
}
}
}
if (!validLayerNames.empty()) {
KP_LOG_DEBUG(
"Kompute Manager Initializing instance with valid layers: {}",
fmt::join(validLayerNames, ", "));
computeInstanceCreateInfo.enabledLayerCount =
static_cast<uint32_t>(validLayerNames.size());
computeInstanceCreateInfo.ppEnabledLayerNames = validLayerNames.data();
} else {
KP_LOG_WARN("Kompute Manager no valid layer names found from desired "
"layer names");
}
#endif
try {
mDynamicLoader = std::make_shared<vk::DynamicLoader>();
} catch (const std::exception & err) {
return;
}
PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
mDynamicLoader->getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
this->mInstance = std::make_shared<vk::Instance>();
vk::createInstance(
&computeInstanceCreateInfo, nullptr, this->mInstance.get());
VULKAN_HPP_DEFAULT_DISPATCHER.init(*this->mInstance);
KP_LOG_DEBUG("Kompute Manager Instance Created");
#ifndef KOMPUTE_DISABLE_VK_DEBUG_LAYERS
KP_LOG_DEBUG("Kompute Manager adding debug callbacks");
if (validLayerNames.size() > 0) {
vk::DebugReportFlagsEXT debugFlags =
vk::DebugReportFlagBitsEXT::eError |
vk::DebugReportFlagBitsEXT::eWarning;
vk::DebugReportCallbackCreateInfoEXT debugCreateInfo = {};
debugCreateInfo.pfnCallback =
(PFN_vkDebugReportCallbackEXT)debugMessageCallback;
debugCreateInfo.flags = debugFlags;
this->mDebugDispatcher.init(*this->mInstance, &vkGetInstanceProcAddr);
this->mDebugReportCallback =
this->mInstance->createDebugReportCallbackEXT(
debugCreateInfo, nullptr, this->mDebugDispatcher);
}
#endif
}
void
Manager::clear()
{
if (this->mManageResources) {
this->mManagedTensors.erase(
std::remove_if(begin(this->mManagedTensors),
end(this->mManagedTensors),
[](std::weak_ptr<Tensor> t) { return t.expired(); }),
end(this->mManagedTensors));
for (auto it = this->mManagedAlgorithmsMap.begin();
it != this->mManagedAlgorithmsMap.end();) {
if (it->second) {
it = this->mManagedAlgorithmsMap.erase(it);
} else {
++it;
}
}
this->mManagedSequences.erase(
std::remove_if(begin(this->mManagedSequences),
end(this->mManagedSequences),
[](std::weak_ptr<Sequence> t) { return t.expired(); }),
end(this->mManagedSequences));
}
}
void
Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
uint32_t physicalDeviceIndex,
const std::vector<std::string>& desiredExtensions)
{
KP_LOG_DEBUG("Kompute Manager creating Device");
if (this->mInstance == nullptr) {
throw std::runtime_error("Kompute Manager instance is null");
}
this->mFreeDevice = true;
// Getting an integer that says how many vuklan devices we have
std::vector<vk::PhysicalDevice> physicalDevices =
this->mInstance->enumeratePhysicalDevices();
uint32_t deviceCount = physicalDevices.size();
// This means there are no devices at all
if (deviceCount == 0) {
throw std::runtime_error("Failed to find GPUs with Vulkan support! "
"Maybe you haven't installed vulkan drivers?");
}
// This means that we're exceeding our device limit, for
// example if we have 2 devices, just physicalDeviceIndex
// 0 and 1 are acceptable. Hence, physicalDeviceIndex should
// always be less than deviceCount, else we raise an error
if (!(deviceCount > physicalDeviceIndex)) {
throw std::runtime_error("There is no such physical index or device, "
"please use your existing device");
}
vk::PhysicalDevice physicalDevice = physicalDevices[physicalDeviceIndex];
this->mPhysicalDevice =
std::make_shared<vk::PhysicalDevice>(physicalDevice);
#if KOMPUTE_OPT_ACTIVE_LOG_LEVEL <= KOMPUTE_LOG_LEVEL_INFO
vk::PhysicalDeviceProperties physicalDeviceProperties =
physicalDevice.getProperties();
#endif
KP_LOG_INFO("Using physical device index {} found {}",
physicalDeviceIndex,
physicalDeviceProperties.deviceName);
if (familyQueueIndices.empty()) {
// Find compute queue
std::vector<vk::QueueFamilyProperties> allQueueFamilyProperties =
physicalDevice.getQueueFamilyProperties();
uint32_t computeQueueFamilyIndex = 0;
bool computeQueueSupported = false;
for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) {
vk::QueueFamilyProperties queueFamilyProperties =
allQueueFamilyProperties[i];
if (queueFamilyProperties.queueFlags &
vk::QueueFlagBits::eCompute) {
computeQueueFamilyIndex = i;
computeQueueSupported = true;
break;
}
}
if (!computeQueueSupported) {
throw std::runtime_error("Compute queue is not supported");
}
this->mComputeQueueFamilyIndices.push_back(computeQueueFamilyIndex);
} else {
this->mComputeQueueFamilyIndices = familyQueueIndices;
}
std::unordered_map<uint32_t, uint32_t> familyQueueCounts;
std::unordered_map<uint32_t, std::vector<float>> familyQueuePriorities;
for (const auto& value : this->mComputeQueueFamilyIndices) {
familyQueueCounts[value]++;
familyQueuePriorities[value].push_back(1.0f);
}
std::unordered_map<uint32_t, uint32_t> familyQueueIndexCount;
std::vector<vk::DeviceQueueCreateInfo> deviceQueueCreateInfos;
for (const auto& familyQueueInfo : familyQueueCounts) {
// Setting the device count to 0
familyQueueIndexCount[familyQueueInfo.first] = 0;
// Creating the respective device queue
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(
vk::DeviceQueueCreateFlags(),
familyQueueInfo.first,
familyQueueInfo.second,
familyQueuePriorities[familyQueueInfo.first].data());
deviceQueueCreateInfos.push_back(deviceQueueCreateInfo);
}
KP_LOG_DEBUG("Kompute Manager desired extension layers {}",
fmt::join(desiredExtensions, ", "));
std::vector<vk::ExtensionProperties> deviceExtensions =
this->mPhysicalDevice->enumerateDeviceExtensionProperties();
std::set<std::string> uniqueExtensionNames;
for (const vk::ExtensionProperties& ext : deviceExtensions) {
uniqueExtensionNames.insert(ext.extensionName);
}
KP_LOG_DEBUG("Kompute Manager available extensions {}",
fmt::join(uniqueExtensionNames, ", "));
std::vector<const char*> validExtensions;
for (const std::string& ext : desiredExtensions) {
if (uniqueExtensionNames.count(ext) != 0) {
validExtensions.push_back(ext.c_str());
}
}
if (desiredExtensions.size() != validExtensions.size()) {
KP_LOG_ERROR("Kompute Manager not all extensions were added: {}",
fmt::join(validExtensions, ", "));
}
vk::PhysicalDeviceFeatures features;
features.shaderInt16 = true;
vk::PhysicalDeviceVulkan11Features features11;
features11.uniformAndStorageBuffer16BitAccess = true;
features11.storageBuffer16BitAccess = true;
features11.pNext = nullptr;
vk::PhysicalDeviceVulkan12Features features12;
features12.storageBuffer8BitAccess = true;
features12.uniformAndStorageBuffer8BitAccess = true;
features12.shaderFloat16 = true;
features12.shaderInt8 = true;
features12.pNext = &features11;
vk::DeviceCreateInfo deviceCreateInfo(vk::DeviceCreateFlags(),
deviceQueueCreateInfos.size(),
deviceQueueCreateInfos.data(),
{},
{},
validExtensions.size(),
validExtensions.data(),
&features);
deviceCreateInfo.pNext = &features12;
this->mDevice = std::make_shared<vk::Device>();
vk::Result r = physicalDevice.createDevice(
&deviceCreateInfo, nullptr, this->mDevice.get());
if (r != vk::Result::eSuccess) {
KP_LOG_ERROR("Kompute Manager could not create device");
}
KP_LOG_DEBUG("Kompute Manager device created");
for (const uint32_t& familyQueueIndex : this->mComputeQueueFamilyIndices) {
std::shared_ptr<vk::Queue> currQueue = std::make_shared<vk::Queue>();
this->mDevice->getQueue(familyQueueIndex,
familyQueueIndexCount[familyQueueIndex],
currQueue.get());
familyQueueIndexCount[familyQueueIndex]++;
this->mComputeQueues.push_back(currQueue);
}
KP_LOG_DEBUG("Kompute Manager compute queue obtained");
mPipelineCache = std::make_shared<vk::PipelineCache>();
vk::PipelineCacheCreateInfo pipelineCacheInfo =
vk::PipelineCacheCreateInfo();
this->mDevice->createPipelineCache(
&pipelineCacheInfo, nullptr, mPipelineCache.get());
}
std::shared_ptr<Sequence>
Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps)
{
KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex);
std::shared_ptr<Sequence> sq{ new kp::Sequence(
this->mPhysicalDevice,
this->mDevice,
this->mComputeQueues[queueIndex],
this->mComputeQueueFamilyIndices[queueIndex],
totalTimestamps) };
if (this->mManageResources) {
this->mManagedSequences.push_back(sq);
}
return sq;
}
vk::PhysicalDeviceProperties
Manager::getDeviceProperties() const
{
return this->mPhysicalDevice->getProperties();
}
std::vector<vk::PhysicalDevice>
Manager::listDevices() const
{
return this->mInstance->enumeratePhysicalDevices();
}
std::shared_ptr<vk::Instance>
Manager::getVkInstance() const
{
return this->mInstance;
}
}