mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-03-17 16:44:07 +00:00
ggml : extend im2col f16 (ggml/1434)
* examples/yolo: fix load_model memory leak * fix/issue-1433 ggml_compute_forward_im2col_f16 assert error * fix/issue-1433
This commit is contained in:
committed by
Georgi Gerganov
parent
dddca026bf
commit
f6da02c3f2
@@ -6205,7 +6205,7 @@ static void ggml_compute_forward_im2col_f16(
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
@@ -6236,7 +6236,7 @@ static void ggml_compute_forward_im2col_f16(
|
||||
int ofs1 = is_2D ? nb12 : nb11;
|
||||
|
||||
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
||||
GGML_ASSERT(nb10 == sizeof(float));
|
||||
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
||||
|
||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||
{
|
||||
@@ -6249,7 +6249,12 @@ static void ggml_compute_forward_im2col_f16(
|
||||
|
||||
// micro kernel
|
||||
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
||||
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
||||
const float * const src_data_f32 = src1->type == GGML_TYPE_F32
|
||||
? (const float *)((const char *) src1->data + in*ofs0 + iic*ofs1)
|
||||
: nullptr; // [IH, IW]
|
||||
const ggml_fp16_t * const src_data_f16 = src1->type == GGML_TYPE_F16
|
||||
? (const ggml_fp16_t *)((const char *) src1->data + in*ofs0 + iic*ofs1)
|
||||
: nullptr; // [IH, IW]
|
||||
|
||||
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
||||
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
||||
@@ -6259,7 +6264,11 @@ static void ggml_compute_forward_im2col_f16(
|
||||
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
||||
} else {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
||||
if (src_data_f32 != nullptr) {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data_f32[iih*IW + iiw]);
|
||||
} else {
|
||||
dst_data[iic*(KH*KW) + ikh*KW + ikw] = src_data_f16[iih*IW + iiw];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user