mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-15 21:44:05 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
27ebfcacba | ||
|
|
5c86c9ed3e | ||
|
|
efb8b47eda | ||
|
|
0527771dd8 | ||
|
|
2189fd3b63 | ||
|
|
3f96aeff39 | ||
|
|
b486ba05bf | ||
|
|
02115dcd9a | ||
|
|
d9c4accaff | ||
|
|
15e03282bb |
1
.github/workflows/build.yml
vendored
1
.github/workflows/build.yml
vendored
@@ -15,7 +15,6 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
GGML_NLOOP: 3
|
||||
GGML_N_THREADS: 1
|
||||
LLAMA_LOG_COLORS: 1
|
||||
|
||||
23
.github/workflows/release.yml
vendored
23
.github/workflows/release.yml
vendored
@@ -16,11 +16,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
contents: write # for creating release
|
||||
|
||||
env:
|
||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
|
||||
@@ -416,28 +411,27 @@ jobs:
|
||||
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
|
||||
run: |
|
||||
cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll
|
||||
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
||||
7z a llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
|
||||
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip
|
||||
name: llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
|
||||
|
||||
- name: Copy and pack Cuda runtime
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
||||
run: |
|
||||
echo "Cuda install location: ${{ env.CUDA_PATH }}"
|
||||
$dst='.\build\bin\cudart\'
|
||||
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
||||
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
||||
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
|
||||
7z a cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip $dst\*
|
||||
|
||||
- name: Upload Cuda runtime
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
||||
name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
||||
path: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
|
||||
name: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
|
||||
|
||||
windows-sycl:
|
||||
runs-on: windows-latest
|
||||
@@ -646,6 +640,11 @@ jobs:
|
||||
release:
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
|
||||
# Fine-grant permission
|
||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
contents: write # for creating release
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
needs:
|
||||
|
||||
@@ -252,20 +252,3 @@ configure_file(cmake/llama.pc.in
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
|
||||
#
|
||||
# copy the license files
|
||||
#
|
||||
|
||||
# Check if running in GitHub Actions
|
||||
if(DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
|
||||
message(STATUS "Running inside GitHub Actions - copying license files")
|
||||
|
||||
# Copy all files from licenses/ to build/bin/
|
||||
file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
|
||||
foreach(LICENSE_FILE ${LICENSE_FILES})
|
||||
get_filename_component(FILENAME ${LICENSE_FILE} NAME)
|
||||
configure_file(${LICENSE_FILE} "${CMAKE_BINARY_DIR}/bin/${FILENAME}" COPYONLY)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
|
||||
@@ -144,3 +144,27 @@ endif ()
|
||||
target_include_directories(${TARGET} PUBLIC .)
|
||||
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
||||
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
||||
|
||||
|
||||
#
|
||||
# copy the license files
|
||||
#
|
||||
|
||||
# Check if running in GitHub Actions
|
||||
if (DEFINED ENV{GITHUB_ACTIONS} AND "$ENV{GITHUB_ACTIONS}" STREQUAL "true")
|
||||
message(STATUS "Running inside GitHub Actions - copying license files")
|
||||
|
||||
# Copy all files from licenses/ to build/bin/
|
||||
file(GLOB LICENSE_FILES "${CMAKE_SOURCE_DIR}/licenses/*")
|
||||
foreach(LICENSE_FILE ${LICENSE_FILES})
|
||||
get_filename_component(FILENAME ${LICENSE_FILE} NAME)
|
||||
add_custom_command(
|
||||
POST_BUILD
|
||||
TARGET ${TARGET}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
"${LICENSE_FILE}"
|
||||
"$<TARGET_FILE_DIR:llama>/${FILENAME}"
|
||||
COMMENT "Copying ${FILENAME} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
||||
message(STATUS "Copying ${LICENSE_FILE} to ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${FILENAME}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@@ -2627,6 +2627,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.i_chunk = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||
add_opt(common_arg(
|
||||
{"--parse-special"},
|
||||
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
|
||||
[](common_params & params) {
|
||||
params.parse_special = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||
add_opt(common_arg(
|
||||
{"-pps"},
|
||||
string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
|
||||
|
||||
@@ -409,6 +409,7 @@ struct common_params {
|
||||
|
||||
bool process_output = false; // collect data for the output tensor
|
||||
bool compute_ppl = true; // whether to compute perplexity
|
||||
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
|
||||
|
||||
// cvector-generator params
|
||||
int n_pca_batch = 100;
|
||||
|
||||
@@ -10,10 +10,11 @@ static __global__ void k_get_rows(
|
||||
/*const size_t nb00,*/ const size_t nb01, const size_t nb02, const size_t nb03,
|
||||
const size_t s10, const size_t s11, const size_t s12/*, const size_t s13*/) {
|
||||
|
||||
const int i00 = (blockIdx.x*blockDim.x + threadIdx.x)*2;
|
||||
const int i10 = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12;
|
||||
const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12;
|
||||
// The x and y dimensions of the grid are swapped because the maximum allowed grid size for x is higher.
|
||||
const int i00 = (blockIdx.y * blockDim.x + threadIdx.x)*2;
|
||||
const int i10 = blockIdx.x;
|
||||
const int i11 = blockIdx.z / ne12;
|
||||
const int i12 = blockIdx.z % ne12;
|
||||
|
||||
if (i00 >= ne00) {
|
||||
return;
|
||||
@@ -46,10 +47,11 @@ static __global__ void k_get_rows_float(
|
||||
/*const size_t nb00,*/ const size_t nb01, const size_t nb02, const size_t nb03,
|
||||
const size_t s10, const size_t s11, const size_t s12/*, const size_t s13*/) {
|
||||
|
||||
const int i00 = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
const int i10 = blockDim.y*blockIdx.y + threadIdx.y;
|
||||
const int i11 = (blockIdx.z*blockDim.z + threadIdx.z)/ne12;
|
||||
const int i12 = (blockIdx.z*blockDim.z + threadIdx.z)%ne12;
|
||||
// The x and y dimensions of the grid are swapped because the maximum allowed grid size for x is higher.
|
||||
const int i00 = blockIdx.y * blockDim.x + threadIdx.x;
|
||||
const int i10 = blockIdx.x;
|
||||
const int i11 = blockIdx.z / ne12;
|
||||
const int i12 = blockIdx.z % ne12;
|
||||
|
||||
if (i00 >= ne00) {
|
||||
return;
|
||||
@@ -94,8 +96,8 @@ static void get_rows_cuda_q(
|
||||
const size_t nb1, const size_t nb2, const size_t nb3,
|
||||
cudaStream_t stream) {
|
||||
const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
|
||||
const int block_num_x = (ne00 + 2*CUDA_GET_ROWS_BLOCK_SIZE - 1) / (2*CUDA_GET_ROWS_BLOCK_SIZE);
|
||||
const dim3 block_nums(block_num_x, ne10, ne11*ne12);
|
||||
const int block_num_y = (ne00 + 2*CUDA_GET_ROWS_BLOCK_SIZE - 1) / (2*CUDA_GET_ROWS_BLOCK_SIZE);
|
||||
const dim3 block_nums(ne10, block_num_y, ne11*ne12);
|
||||
|
||||
// strides in elements
|
||||
// const size_t s0 = nb0 / sizeof(dst_t);
|
||||
@@ -127,8 +129,8 @@ static void get_rows_cuda_float(
|
||||
const size_t nb1, const size_t nb2, const size_t nb3,
|
||||
cudaStream_t stream) {
|
||||
const dim3 block_dims(CUDA_GET_ROWS_BLOCK_SIZE, 1, 1);
|
||||
const int block_num_x = (ne00 + CUDA_GET_ROWS_BLOCK_SIZE - 1) / CUDA_GET_ROWS_BLOCK_SIZE;
|
||||
const dim3 block_nums(block_num_x, ne10, ne11*ne12);
|
||||
const int block_num_y = (ne00 + CUDA_GET_ROWS_BLOCK_SIZE - 1) / CUDA_GET_ROWS_BLOCK_SIZE;
|
||||
const dim3 block_nums(ne10, block_num_y, ne11*ne12);
|
||||
|
||||
// strides in elements
|
||||
// const size_t s0 = nb0 / sizeof(dst_t);
|
||||
|
||||
@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
|
||||
uint8_t value;
|
||||
};
|
||||
|
||||
struct rpc_msg_set_tensor_hash_req {
|
||||
rpc_tensor tensor;
|
||||
uint64_t offset;
|
||||
uint64_t hash;
|
||||
};
|
||||
|
||||
struct rpc_msg_set_tensor_hash_rsp {
|
||||
uint8_t result;
|
||||
};
|
||||
@@ -548,15 +554,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
|
||||
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
|
||||
rpc_tensor rpc_tensor = serialize_tensor(tensor);
|
||||
if (size > HASH_THRESHOLD) {
|
||||
// input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
|
||||
size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t);
|
||||
std::vector<uint8_t> input(input_size, 0);
|
||||
uint64_t hash = fnv_hash((const uint8_t*)data, size);
|
||||
memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
|
||||
memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
|
||||
memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash));
|
||||
rpc_msg_set_tensor_hash_req request;
|
||||
request.tensor = rpc_tensor;
|
||||
request.offset = offset;
|
||||
request.hash = fnv_hash((const uint8_t*)data, size);
|
||||
rpc_msg_set_tensor_hash_rsp response;
|
||||
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response));
|
||||
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
|
||||
GGML_ASSERT(status);
|
||||
if (response.result) {
|
||||
// the server has the same data, no need to send it
|
||||
@@ -864,7 +867,7 @@ public:
|
||||
bool free_buffer(const rpc_msg_free_buffer_req & request);
|
||||
bool buffer_clear(const rpc_msg_buffer_clear_req & request);
|
||||
bool set_tensor(const std::vector<uint8_t> & input);
|
||||
bool set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response);
|
||||
bool set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response);
|
||||
bool get_tensor(const rpc_msg_get_tensor_req & request, std::vector<uint8_t> & response);
|
||||
bool copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
|
||||
bool graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response);
|
||||
@@ -1101,18 +1104,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response)
|
||||
bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response)
|
||||
{
|
||||
// serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
|
||||
if (input.size() != sizeof(rpc_tensor) + 16) {
|
||||
return false;
|
||||
}
|
||||
const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
|
||||
uint64_t offset;
|
||||
memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
|
||||
const uint64_t * hash = (const uint64_t *)(input.data() + sizeof(rpc_tensor) + sizeof(offset));
|
||||
std::vector<uint8_t> cached_file;
|
||||
if (!get_cached_file(*hash, cached_file)) {
|
||||
if (!get_cached_file(request.hash, cached_file)) {
|
||||
response.result = 0;
|
||||
return true;
|
||||
}
|
||||
@@ -1125,25 +1120,28 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
|
||||
ggml_context_ptr ctx_ptr { ggml_init(params) };
|
||||
GGML_ASSERT(ctx_ptr != nullptr);
|
||||
ggml_context * ctx = ctx_ptr.get();
|
||||
ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
|
||||
ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
|
||||
if (tensor == nullptr) {
|
||||
GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
|
||||
return false;
|
||||
}
|
||||
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
|
||||
GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
|
||||
__func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash);
|
||||
|
||||
// sanitize tensor->data
|
||||
{
|
||||
const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
|
||||
const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
|
||||
|
||||
if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
|
||||
if (request.tensor.data + request.offset < p0
|
||||
|| request.tensor.data + request.offset >= p1
|
||||
|| size > (p1 - request.tensor.data - request.offset)) {
|
||||
GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
|
||||
__func__, in_tensor->data, offset, size, *hash, p0, p1);
|
||||
__func__, request.tensor.data, request.offset, size, request.hash, p0, p1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
|
||||
ggml_backend_tensor_set(tensor, cached_file.data(), request.offset, size);
|
||||
response.result = 1;
|
||||
return true;
|
||||
}
|
||||
@@ -1503,12 +1501,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
|
||||
break;
|
||||
}
|
||||
case RPC_CMD_SET_TENSOR_HASH: {
|
||||
std::vector<uint8_t> input;
|
||||
if (!recv_msg(sockfd, input)) {
|
||||
rpc_msg_set_tensor_hash_req request;
|
||||
if (!recv_msg(sockfd, &request, sizeof(request))) {
|
||||
return;
|
||||
}
|
||||
rpc_msg_set_tensor_hash_rsp response;
|
||||
if (!server.set_tensor_hash(input, response)) {
|
||||
if (!server.set_tensor_hash(request, response)) {
|
||||
return;
|
||||
}
|
||||
if (!send_msg(sockfd, &response, sizeof(response))) {
|
||||
|
||||
@@ -1632,7 +1632,7 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
|
||||
const uint32_t warps = warptile[0] / warptile[10];
|
||||
|
||||
const uint32_t load_bufs = (warptile[1] + warptile[2]) * (warptile[3] + bank_conflict_offset) * type_size;
|
||||
const uint32_t mmid_row_ids = mul_mat_id ? 3072 * sizeof(uint32_t) : 0;
|
||||
const uint32_t mmid_row_ids = mul_mat_id ? 4096 * sizeof(uint32_t) : 0;
|
||||
const uint32_t coopmat_stage = device->coopmat_support ? warptile[7] * warptile[8] / warps * sizeof(float) : 0;
|
||||
|
||||
const uint32_t total_size = load_bufs + mmid_row_ids + coopmat_stage + lut_size;
|
||||
@@ -5260,7 +5260,7 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
||||
|
||||
const uint64_t nei0 = ids->ne[0];
|
||||
const uint64_t nei1 = ids->ne[1];
|
||||
GGML_ASSERT(nei0 * nei1 <= 3072);
|
||||
GGML_ASSERT(nei0 * nei1 <= 4096);
|
||||
|
||||
const uint32_t nbi1 = ids->nb[1];
|
||||
const uint32_t nbi2 = ids->nb[2];
|
||||
|
||||
@@ -103,7 +103,7 @@ shared FLOAT_TYPE buf_a[BM * SHMEM_STRIDE];
|
||||
shared FLOAT_TYPE buf_b[BN * SHMEM_STRIDE];
|
||||
|
||||
#ifdef MUL_MAT_ID
|
||||
shared u16vec2 row_ids[3072];
|
||||
shared u16vec2 row_ids[4096];
|
||||
#endif // MUL_MAT_ID
|
||||
|
||||
#define NUM_WARPS (BLOCK_SIZE / WARP)
|
||||
|
||||
@@ -92,7 +92,7 @@ layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
|
||||
#ifdef MUL_MAT_ID
|
||||
layout (binding = 3) readonly buffer IDS {int data_ids[];};
|
||||
|
||||
shared u16vec4 row_ids[3072];
|
||||
shared u16vec4 row_ids[4096];
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufB {
|
||||
B_TYPE b[];
|
||||
|
||||
@@ -101,7 +101,7 @@ shared FLOAT_TYPE_VEC2 buf_b_ds[BN];
|
||||
#define LOAD_VEC_B 4
|
||||
|
||||
#ifdef MUL_MAT_ID
|
||||
shared u16vec2 row_ids[3072];
|
||||
shared u16vec2 row_ids[4096];
|
||||
#endif // MUL_MAT_ID
|
||||
|
||||
#define NUM_WARPS (BLOCK_SIZE / WARP)
|
||||
|
||||
@@ -253,6 +253,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
||||
std::vector<ggml_backend_buffer_type_t> buft_extra;
|
||||
{
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (!cpu_dev) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
|
||||
|
||||
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
|
||||
@@ -291,6 +294,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
||||
LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, ggml_backend_buft_name(buft));
|
||||
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (!cpu_dev) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
buft = ggml_backend_dev_buffer_type(cpu_dev);
|
||||
|
||||
break;
|
||||
|
||||
@@ -35,6 +35,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
||||
{ "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 },
|
||||
{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
|
||||
{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
|
||||
{ "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
|
||||
{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
|
||||
{ "phi4", LLM_CHAT_TEMPLATE_PHI_4 },
|
||||
{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
|
||||
@@ -202,19 +203,20 @@ int32_t llm_chat_apply_template(
|
||||
if (add_ass) {
|
||||
ss << "<|im_start|>assistant\n";
|
||||
}
|
||||
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7) {
|
||||
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
|
||||
// Official mistral 'v7' template
|
||||
// See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
|
||||
// https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
|
||||
const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
|
||||
for (auto message : chat) {
|
||||
std::string role(message->role);
|
||||
std::string content(message->content);
|
||||
if (role == "system") {
|
||||
ss << "[SYSTEM_PROMPT] " << content << "[/SYSTEM_PROMPT]";
|
||||
ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
|
||||
} else if (role == "user") {
|
||||
ss << "[INST] " << content << "[/INST]";
|
||||
}
|
||||
else {
|
||||
ss << " " << content << "</s>";
|
||||
ss << "[INST]" << trailing_space << content << "[/INST]";
|
||||
} else {
|
||||
ss << trailing_space << content << "</s>";
|
||||
}
|
||||
}
|
||||
} else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
|
||||
|
||||
@@ -14,6 +14,7 @@ enum llm_chat_template {
|
||||
LLM_CHAT_TEMPLATE_MISTRAL_V3,
|
||||
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
|
||||
LLM_CHAT_TEMPLATE_MISTRAL_V7,
|
||||
LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN,
|
||||
LLM_CHAT_TEMPLATE_PHI_3,
|
||||
LLM_CHAT_TEMPLATE_PHI_4,
|
||||
LLM_CHAT_TEMPLATE_FALCON_3,
|
||||
|
||||
@@ -823,6 +823,10 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
|
||||
mmaps_used.reserve(files.size());
|
||||
for (const auto & file : files) {
|
||||
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
|
||||
if (!reg) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
|
||||
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
|
||||
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
|
||||
mmaps_used.emplace_back(mapping->size(), 0);
|
||||
|
||||
@@ -299,6 +299,10 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
|
||||
// add extra buffer types, only if no GPU device is present
|
||||
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (cpu_dev == nullptr) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
|
||||
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
|
||||
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
|
||||
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
|
||||
@@ -1484,6 +1488,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
}
|
||||
|
||||
ggml_backend_dev_t cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (cpu_dev == nullptr) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
const int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0);
|
||||
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
|
||||
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
|
||||
@@ -1672,6 +1679,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
auto * buft_dev = ggml_backend_buft_get_device(buft);
|
||||
if (ml.use_mmap && buft_dev && buft == ggml_backend_dev_host_buffer_type(buft_dev)) {
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (!cpu_dev) {
|
||||
throw std::runtime_error("no CPU backend found");
|
||||
}
|
||||
buft = ggml_backend_dev_buffer_type(cpu_dev);
|
||||
}
|
||||
|
||||
@@ -4122,6 +4132,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
if (!dev) {
|
||||
// FIXME: workaround for CPU backend buft having a NULL device
|
||||
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (!dev) {
|
||||
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
||||
}
|
||||
}
|
||||
ggml_backend_dev_props props;
|
||||
ggml_backend_dev_get_props(dev, &props);
|
||||
@@ -13387,6 +13400,14 @@ const char * llama_model_chat_template(const llama_model * model, const char * n
|
||||
: LLM_KV(model->arch)(LLM_KV_TOKENIZER_CHAT_TEMPLATE);
|
||||
const auto & it = model->gguf_kv.find(key);
|
||||
if (it == model->gguf_kv.end()) {
|
||||
// one-off fix for very popular models (so we are not flooded with issues)
|
||||
// do not extend this list unless absolutely necessary
|
||||
// Mistral-Small-2503 does not have built-in chat template
|
||||
llama_vocab_pre_type pre_type = model->vocab.get_pre_type();
|
||||
if (pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) {
|
||||
return "mistral-v7-tekken";
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,8 @@ static void print_usage(int, char ** argv) {
|
||||
LOG("\n %s \\\n"
|
||||
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
|
||||
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
||||
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
|
||||
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...] \\\n"
|
||||
" [--parse-special]\n" , argv[0]);
|
||||
LOG("\n");
|
||||
}
|
||||
|
||||
@@ -439,7 +440,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
|
||||
auto tim1 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenizing the input ..\n", __func__);
|
||||
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true, params.parse_special);
|
||||
|
||||
auto tim2 = std::chrono::high_resolution_clock::now();
|
||||
LOG_INF("%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());
|
||||
|
||||
@@ -152,7 +152,12 @@ int main(int argc, char ** argv) {
|
||||
|
||||
LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);
|
||||
|
||||
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
|
||||
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||
if (!cpu_dev) {
|
||||
LOG_ERR("%s: no CPU backend found\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
auto * reg = ggml_backend_dev_backend_reg(cpu_dev);
|
||||
auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new");
|
||||
auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free");
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-32B-Instruct-GGUF
|
||||
llama-mtmd-cli -hf ggml-org/Qwen2.5-VL-72B-Instruct-GGUF
|
||||
|
||||
# Mistral Small 3.1 24B (IQ2_M quantization)
|
||||
llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF --chat-template mistral-v7
|
||||
llama-mtmd-cli -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF
|
||||
```
|
||||
|
||||
## How it works and what is `mmproj`?
|
||||
|
||||
@@ -352,9 +352,12 @@ struct clip_ctx {
|
||||
|
||||
clip_ctx(clip_context_params & ctx_params) {
|
||||
backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
|
||||
backend = ctx_params.use_gpu
|
||||
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
|
||||
: nullptr;
|
||||
if (!backend_cpu) {
|
||||
throw std::runtime_error("failed to initialize CPU backend");
|
||||
}
|
||||
backend = ctx_params.use_gpu
|
||||
? ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr)
|
||||
: nullptr;
|
||||
|
||||
if (backend) {
|
||||
LOG_INF("%s: CLIP using %s backend\n", __func__, ggml_backend_name(backend));
|
||||
@@ -2185,9 +2188,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity) {
|
||||
|
||||
struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params) {
|
||||
g_logger_state.verbosity_thold = ctx_params.verbosity;
|
||||
clip_ctx * ctx_clip = new clip_ctx(ctx_params);
|
||||
clip_ctx * ctx_clip = nullptr;
|
||||
|
||||
try {
|
||||
ctx_clip = new clip_ctx(ctx_params);
|
||||
clip_model_loader loader(fname, *ctx_clip);
|
||||
loader.load_hparams();
|
||||
loader.load_tensors();
|
||||
|
||||
@@ -212,6 +212,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
|
||||
ggml_build_forward_expand(gf, flatten);
|
||||
|
||||
ggml_backend_ptr backend { ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr) };
|
||||
GGML_ASSERT(backend != nullptr && "failed to initialize CPU backend");
|
||||
ggml_backend_graph_compute(backend.get(), gf);
|
||||
|
||||
struct ggml_tensor* result = ggml_graph_node(gf, -1);
|
||||
|
||||
@@ -554,14 +554,19 @@ struct decode_embd_batch {
|
||||
llama_batch get_view(int offset, int n_tokens) {
|
||||
llama_pos * pos_ptr;
|
||||
pos_view.clear();
|
||||
pos_view.resize(n_tokens * n_pos_per_embd);
|
||||
pos_view.reserve(n_tokens * n_pos_per_embd);
|
||||
if (n_pos_per_embd > 1) {
|
||||
// mrope
|
||||
// for example, with layout of src: 1234...1234...1234...1234...
|
||||
// offset 2 will give us dst: 34...34...34...34...
|
||||
for (int i = 0; i < n_pos_per_embd; i++) {
|
||||
auto src = pos.begin() + i * batch.n_tokens + offset;
|
||||
pos_view.insert(pos_view.end(), src, src + n_tokens);
|
||||
// assume n_tokens is less than or equal to batch.n_tokens
|
||||
// batch.n_tokens is number of **total** tokens
|
||||
// n_tokens is number of viewed token
|
||||
size_t src_idx = i * batch.n_tokens + offset;
|
||||
pos_view.insert(pos_view.end(),
|
||||
pos.data() + src_idx,
|
||||
pos.data() + src_idx + n_tokens);
|
||||
}
|
||||
pos_ptr = pos_view.data();
|
||||
} else {
|
||||
|
||||
@@ -237,15 +237,17 @@ static ggml_backend_t create_backend(const rpc_server_params & params) {
|
||||
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||
if (backend) {
|
||||
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||
|
||||
// set the number of threads
|
||||
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
||||
if (reg) {
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
ggml_backend_set_n_threads_fn(backend, params.n_threads);
|
||||
// set the number of threads
|
||||
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
||||
if (reg) {
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
ggml_backend_set_n_threads_fn(backend, params.n_threads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ Examples:
|
||||
llama-run ollama://smollm:135m
|
||||
llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf
|
||||
llama-run huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf
|
||||
llama-run ms://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf
|
||||
llama-run modelscope://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf
|
||||
llama-run https://example.com/some-file1.gguf
|
||||
llama-run some-file2.gguf
|
||||
llama-run file://some-file3.gguf
|
||||
|
||||
@@ -267,7 +267,7 @@ class Opt {
|
||||
"Commands:\n"
|
||||
" model\n"
|
||||
" Model is a string with an optional prefix of \n"
|
||||
" huggingface:// (hf://), ollama://, https:// or file://.\n"
|
||||
" huggingface:// (hf://), modelscope:// (ms://), ollama://, https:// or file://.\n"
|
||||
" If no protocol is specified and a file exists in the specified\n"
|
||||
" path, file:// is assumed, otherwise if a file does not exist in\n"
|
||||
" the specified path, ollama:// is assumed. Models that are being\n"
|
||||
@@ -282,6 +282,9 @@ class Opt {
|
||||
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
||||
" llama-run "
|
||||
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
||||
" llama-run ms://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
||||
" llama-run "
|
||||
"modelscope://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
||||
" llama-run https://example.com/some-file1.gguf\n"
|
||||
" llama-run some-file2.gguf\n"
|
||||
" llama-run file://some-file3.gguf\n"
|
||||
@@ -689,7 +692,7 @@ class LlamaData {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int huggingface_dl(std::string & model, const std::string & bn) {
|
||||
int dl_from_endpoint(std::string & model_endpoint, std::string & model, const std::string & bn) {
|
||||
// Find the second occurrence of '/' after protocol string
|
||||
size_t pos = model.find('/');
|
||||
pos = model.find('/', pos + 1);
|
||||
@@ -697,8 +700,6 @@ class LlamaData {
|
||||
std::vector<std::string> headers = { "User-Agent: llama-cpp", "Accept: application/json" };
|
||||
std::string url;
|
||||
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
auto [model_name, manifest_url] = extract_model_and_tag(model, model_endpoint + "v2/");
|
||||
hfr = model_name;
|
||||
@@ -720,6 +721,16 @@ class LlamaData {
|
||||
return download(url, bn, true, headers);
|
||||
}
|
||||
|
||||
int modelscope_dl(std::string & model, const std::string & bn) {
|
||||
std::string model_endpoint = "https://modelscope.cn/models/";
|
||||
return dl_from_endpoint(model_endpoint, model, bn);
|
||||
}
|
||||
|
||||
int huggingface_dl(std::string & model, const std::string & bn) {
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
return dl_from_endpoint(model_endpoint, model, bn);
|
||||
}
|
||||
|
||||
int ollama_dl(std::string & model, const std::string & bn) {
|
||||
const std::vector<std::string> headers = { "Accept: application/vnd.docker.distribution.manifest.v2+json" };
|
||||
if (model.find('/') == std::string::npos) {
|
||||
@@ -837,6 +848,9 @@ class LlamaData {
|
||||
rm_until_substring(model_, "hf.co/");
|
||||
rm_until_substring(model_, "://");
|
||||
ret = huggingface_dl(model_, bn);
|
||||
} else if (string_starts_with(model_, "ms://") || string_starts_with(model_, "modelscope://")) {
|
||||
rm_until_substring(model_, "://");
|
||||
ret = modelscope_dl(model_, bn);
|
||||
} else if ((string_starts_with(model_, "https://") || string_starts_with(model_, "http://")) &&
|
||||
!string_starts_with(model_, "https://ollama.com/library/")) {
|
||||
ret = download(model_, bn, true);
|
||||
|
||||
Binary file not shown.
@@ -37,19 +37,26 @@ export function useChatExtraContext(): ChatExtraContextApi {
|
||||
break;
|
||||
}
|
||||
|
||||
if (mimeType.startsWith('image/') && mimeType !== 'image/svg+xml') {
|
||||
if (!serverProps?.has_multimodal) {
|
||||
if (mimeType.startsWith('image/')) {
|
||||
if (!serverProps?.modalities?.vision) {
|
||||
toast.error('Multimodal is not supported by this server or model.');
|
||||
break;
|
||||
}
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
reader.onload = async (event) => {
|
||||
if (event.target?.result) {
|
||||
let base64Url = event.target.result as string;
|
||||
|
||||
if (mimeType === 'image/svg+xml') {
|
||||
// Convert SVG to PNG
|
||||
base64Url = await svgBase64UrlToPngDataURL(base64Url);
|
||||
}
|
||||
|
||||
addItems([
|
||||
{
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url: event.target.result as string,
|
||||
base64Url,
|
||||
},
|
||||
]);
|
||||
}
|
||||
@@ -172,3 +179,56 @@ export function isLikelyNotBinary(str: string): boolean {
|
||||
const ratio = suspiciousCharCount / sampleLength;
|
||||
return ratio <= options.suspiciousCharThresholdRatio;
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// Converts a Base64URL encoded SVG string to a PNG Data URL using browser Canvas API.
|
||||
function svgBase64UrlToPngDataURL(base64UrlSvg: string): Promise<string> {
|
||||
const backgroundColor = 'white'; // Default background color for PNG
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
const img = new Image();
|
||||
|
||||
img.onload = () => {
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
reject(new Error('Failed to get 2D canvas context.'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Use provided dimensions or SVG's natural dimensions, with fallbacks
|
||||
// Fallbacks (e.g., 300x300) are for SVGs without explicit width/height
|
||||
// or when naturalWidth/Height might be 0 before full processing.
|
||||
const targetWidth = img.naturalWidth || 300;
|
||||
const targetHeight = img.naturalHeight || 300;
|
||||
|
||||
canvas.width = targetWidth;
|
||||
canvas.height = targetHeight;
|
||||
|
||||
if (backgroundColor) {
|
||||
ctx.fillStyle = backgroundColor;
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, targetWidth, targetHeight);
|
||||
resolve(canvas.toDataURL('image/png'));
|
||||
};
|
||||
|
||||
img.onerror = () => {
|
||||
reject(
|
||||
new Error('Failed to load SVG image. Ensure the SVG data is valid.')
|
||||
);
|
||||
};
|
||||
|
||||
// Load SVG string into an Image element
|
||||
img.src = base64UrlSvg;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const errorMessage = `Error converting SVG to PNG: ${message}`;
|
||||
toast.error(errorMessage);
|
||||
reject(new Error(errorMessage));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -118,6 +118,8 @@ export interface LlamaCppServerProps {
|
||||
build_info: string;
|
||||
model_path: string;
|
||||
n_ctx: number;
|
||||
has_multimodal: boolean;
|
||||
modalities?: {
|
||||
vision: boolean;
|
||||
};
|
||||
// TODO: support params
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user