ggml : extend im2col f16 (ggml/1434)

* examples/yolo: fix load_model memory leak

* fix/issue-1433 ggml_compute_forward_im2col_f16 assert error

* fix/issue-1433
This commit is contained in:
David366AI
2026-03-15 15:50:56 -04:00
committed by Georgi Gerganov
parent dddca026bf
commit f6da02c3f2

View File

@@ -6205,7 +6205,7 @@ static void ggml_compute_forward_im2col_f16(
const ggml_tensor * src1 = dst->src[1];
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F16);
GGML_TENSOR_BINARY_OP_LOCALS;
@@ -6236,7 +6236,7 @@ static void ggml_compute_forward_im2col_f16(
int ofs1 = is_2D ? nb12 : nb11;
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb10 == sizeof(float));
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
{
@@ -6249,7 +6249,12 @@ static void ggml_compute_forward_im2col_f16(
// micro kernel
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
const float * const src_data_f32 = src1->type == GGML_TYPE_F32
? (const float *)((const char *) src1->data + in*ofs0 + iic*ofs1)
: nullptr; // [IH, IW]
const ggml_fp16_t * const src_data_f16 = src1->type == GGML_TYPE_F16
? (const ggml_fp16_t *)((const char *) src1->data + in*ofs0 + iic*ofs1)
: nullptr; // [IH, IW]
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
for (int64_t ikw = 0; ikw < KW; ikw++) {
@@ -6259,7 +6264,11 @@ static void ggml_compute_forward_im2col_f16(
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
} else {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data[iih*IW + iiw]);
if (src_data_f32 != nullptr) {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data_f32[iih*IW + iiw]);
} else {
dst_data[iic*(KH*KW) + ikh*KW + ikw] = src_data_f16[iih*IW + iiw];
}
}
}
}