mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-10 02:54:06 +00:00
Nomic vulkan backend licensed under the Software for Open Models License (SOM), version 1.0.
This commit is contained in:
450
kompute/src/Algorithm.cpp
Normal file
450
kompute/src/Algorithm.cpp
Normal file
@@ -0,0 +1,450 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
/**
|
||||
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the Software for Open Models License (SOM),
|
||||
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
|
||||
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "kompute/Algorithm.hpp"
|
||||
|
||||
namespace kp {
|
||||
|
||||
Algorithm::~Algorithm()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
|
||||
|
||||
this->destroy();
|
||||
}
|
||||
|
||||
bool
|
||||
Algorithm::isInit()
|
||||
{
|
||||
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
|
||||
this->mDescriptorPool && this->mDescriptorSet &&
|
||||
this->mDescriptorSetLayout && this->mShaderModule;
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::destroy()
|
||||
{
|
||||
// We don't have to free memory on destroy as it's freed by the
|
||||
// commandBuffer destructor if (this->mPushConstantsData) {
|
||||
// free(this->mPushConstantsData);
|
||||
// }
|
||||
// if (this->mSpecializationConstantsData) {
|
||||
// free(this->mSpecializationConstantsData);
|
||||
// }
|
||||
|
||||
if (!this->mDevice) {
|
||||
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
|
||||
"Device pointer");
|
||||
return;
|
||||
}
|
||||
|
||||
if (this->mFreePipeline && this->mPipeline) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
|
||||
if (!this->mPipeline) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipeline,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipeline = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineCache && this->mPipelineCache) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline cache");
|
||||
if (!this->mPipelineCache) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline cache but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineCache,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineCache = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreePipelineLayout && this->mPipelineLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
|
||||
if (!this->mPipelineLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"pipeline layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mPipelineLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mPipelineLayout = nullptr;
|
||||
}
|
||||
|
||||
if (this->mFreeShaderModule && this->mShaderModule) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
|
||||
if (!this->mShaderModule) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
|
||||
"module but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mShaderModule,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mShaderModule = nullptr;
|
||||
}
|
||||
|
||||
freeParameters();
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::freeParameters()
|
||||
{
|
||||
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
|
||||
if (!this->mDescriptorSetLayout) {
|
||||
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
|
||||
"descriptor set layout but it is null");
|
||||
}
|
||||
this->mDevice->destroy(
|
||||
*this->mDescriptorSetLayout,
|
||||
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
|
||||
this->mDescriptorSetLayout = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
|
||||
if (!*this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
descriptorSetBindings.push_back(
|
||||
vk::DescriptorSetLayoutBinding(i, // Binding index
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
1, // Descriptor count
|
||||
vk::ShaderStageFlagBits::eCompute));
|
||||
}
|
||||
|
||||
// This is the component that is fed into the pipeline
|
||||
vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
|
||||
vk::DescriptorSetLayoutCreateFlags(),
|
||||
static_cast<uint32_t>(descriptorSetBindings.size()),
|
||||
descriptorSetBindings.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout");
|
||||
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
|
||||
vk::Result result = this->mDevice->createDescriptorSetLayout(
|
||||
&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSetLayout = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor set layout.");
|
||||
}
|
||||
|
||||
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
|
||||
*this->mDescriptorPool,
|
||||
1, // Descriptor set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
|
||||
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
|
||||
result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
|
||||
this->mDescriptorSet.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSet = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
|
||||
}
|
||||
|
||||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
i, // Destination binding
|
||||
0, // Destination array element
|
||||
1, // Descriptor count
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
nullptr, // Descriptor image info
|
||||
&descriptorBufferInfo));
|
||||
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::updateParameters()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm updateParameters started");
|
||||
if (!*this->mDescriptorPool) {
|
||||
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
|
||||
*this->mDescriptorPool,
|
||||
1, // Descriptor set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
|
||||
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
|
||||
vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
|
||||
this->mDescriptorSet.get());
|
||||
|
||||
if (result != vk::Result::eSuccess) {
|
||||
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
|
||||
} else {
|
||||
this->mFreeDescriptorSet = true;
|
||||
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
|
||||
}
|
||||
|
||||
this->mFreeDescriptorSet = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
|
||||
for (size_t i = 0; i < this->mTensors.size(); i++) {
|
||||
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
|
||||
|
||||
vk::DescriptorBufferInfo descriptorBufferInfo =
|
||||
this->mTensors[i]->constructDescriptorBufferInfo();
|
||||
|
||||
computeWriteDescriptorSets.push_back(
|
||||
vk::WriteDescriptorSet(*this->mDescriptorSet,
|
||||
i, // Destination binding
|
||||
0, // Destination array element
|
||||
1, // Descriptor count
|
||||
vk::DescriptorType::eStorageBuffer,
|
||||
nullptr, // Descriptor image info
|
||||
&descriptorBufferInfo));
|
||||
|
||||
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createShaderModule()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
|
||||
|
||||
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
|
||||
sizeof(uint32_t) *
|
||||
this->mSpirv.size(),
|
||||
this->mSpirv.data());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
|
||||
this->mSpirv.size());
|
||||
this->mFreeShaderModule = true;
|
||||
this->mShaderModule = std::make_shared<vk::ShaderModule>();
|
||||
this->mDevice->createShaderModule(
|
||||
&shaderModuleInfo, nullptr, this->mShaderModule.get());
|
||||
this->mFreeShaderModule = true;
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm create shader module success");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::createPipeline()
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline");
|
||||
|
||||
vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
|
||||
vk::PipelineLayoutCreateFlags(),
|
||||
1, // Set layout count
|
||||
this->mDescriptorSetLayout.get());
|
||||
|
||||
vk::PushConstantRange pushConstantRange;
|
||||
if (this->mPushConstantsSize) {
|
||||
pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
|
||||
pushConstantRange.setOffset(0);
|
||||
pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize *
|
||||
this->mPushConstantsSize);
|
||||
|
||||
pipelineLayoutInfo.setPushConstantRangeCount(1);
|
||||
pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
|
||||
}
|
||||
|
||||
this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
|
||||
this->mDevice->createPipelineLayout(
|
||||
&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
|
||||
this->mFreePipelineLayout = true;
|
||||
|
||||
std::vector<vk::SpecializationMapEntry> specializationEntries;
|
||||
|
||||
for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
|
||||
vk::SpecializationMapEntry specializationEntry(
|
||||
static_cast<uint32_t>(i),
|
||||
static_cast<uint32_t>(
|
||||
this->mSpecializationConstantsDataTypeMemorySize * i),
|
||||
this->mSpecializationConstantsDataTypeMemorySize);
|
||||
|
||||
specializationEntries.push_back(specializationEntry);
|
||||
}
|
||||
|
||||
// This passes ownership of the memory so we remove ownership from
|
||||
// specialization container by using "transferDataOwnership"
|
||||
vk::SpecializationInfo specializationInfo(
|
||||
static_cast<uint32_t>(specializationEntries.size()),
|
||||
specializationEntries.data(),
|
||||
this->mSpecializationConstantsDataTypeMemorySize *
|
||||
this->mSpecializationConstantsSize,
|
||||
this->mSpecializationConstantsData);
|
||||
|
||||
vk::PipelineShaderStageCreateInfo shaderStage(
|
||||
vk::PipelineShaderStageCreateFlags(),
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
*this->mShaderModule,
|
||||
"main",
|
||||
&specializationInfo);
|
||||
|
||||
static std::shared_ptr<vk::PipelineCache> globalPipelineCache = std::make_shared<vk::PipelineCache>();
|
||||
if(!*globalPipelineCache) {
|
||||
vk::PipelineCacheCreateInfo pipelineCacheInfo =
|
||||
vk::PipelineCacheCreateInfo();
|
||||
this->mPipelineCache = globalPipelineCache;
|
||||
this->mFreePipelineCache = true;
|
||||
this->mDevice->createPipelineCache(
|
||||
&pipelineCacheInfo, nullptr, globalPipelineCache.get());
|
||||
}
|
||||
|
||||
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
|
||||
shaderStage,
|
||||
*this->mPipelineLayout,
|
||||
vk::Pipeline(),
|
||||
0);
|
||||
|
||||
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
|
||||
vk::ResultValue<vk::Pipeline> pipelineResult =
|
||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo);
|
||||
|
||||
if (pipelineResult.result != vk::Result::eSuccess) {
|
||||
throw std::runtime_error("Failed to create pipeline result: " +
|
||||
vk::to_string(pipelineResult.result));
|
||||
}
|
||||
|
||||
vk::Pipeline& pipeline = pipelineResult.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#else
|
||||
vk::Pipeline pipeline =
|
||||
this->mDevice->createComputePipeline(*globalPipelineCache, pipelineInfo)
|
||||
.value;
|
||||
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
|
||||
this->mFreePipeline = true;
|
||||
#endif
|
||||
|
||||
// TODO: Update to consistent
|
||||
// this->mPipeline = std::make_shared<vk::Pipeline>();
|
||||
// this->mDevice->createComputePipelines(
|
||||
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
|
||||
// this->mPipeline.get());
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
|
||||
|
||||
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipeline);
|
||||
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
|
||||
|
||||
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
|
||||
*this->mPipelineLayout,
|
||||
0, // First set
|
||||
*this->mDescriptorSet,
|
||||
nullptr // Dispatcher
|
||||
);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
if (this->mPushConstantsSize) {
|
||||
KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
|
||||
this->mPushConstantsSize *
|
||||
this->mPushConstantsDataTypeMemorySize);
|
||||
|
||||
commandBuffer.pushConstants(*this->mPipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
this->mPushConstantsSize *
|
||||
this->mPushConstantsDataTypeMemorySize,
|
||||
this->mPushConstantsData);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
|
||||
{
|
||||
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
|
||||
|
||||
commandBuffer.dispatch(
|
||||
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
void
|
||||
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
|
||||
{
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
|
||||
|
||||
// The dispatch size is set up based on either explicitly provided template
|
||||
// parameters or by default it would take the shape and size of the tensors
|
||||
if (workgroup[0] > 0) {
|
||||
// If at least the x value is provided we use mainly the parameters
|
||||
// provided
|
||||
this->mWorkgroup = { workgroup[0],
|
||||
workgroup[1] > 0 ? workgroup[1] : 1,
|
||||
workgroup[2] > 0 ? workgroup[2] : 1 };
|
||||
} else {
|
||||
this->mWorkgroup = { minSize, 1, 1 };
|
||||
}
|
||||
|
||||
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
|
||||
this->mWorkgroup[0],
|
||||
this->mWorkgroup[1],
|
||||
this->mWorkgroup[2]);
|
||||
}
|
||||
|
||||
const Workgroup&
|
||||
Algorithm::getWorkgroup()
|
||||
{
|
||||
return this->mWorkgroup;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<Tensor>>&
|
||||
Algorithm::getTensors()
|
||||
{
|
||||
return this->mTensors;
|
||||
}
|
||||
|
||||
void Algorithm::setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors)
|
||||
{
|
||||
this->mTensors = tensors;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user