mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-10 19:14:07 +00:00
convert : handle naive-quantized models
This commit is contained in:
@@ -412,7 +412,7 @@ class ModelBase:
|
||||
raise NotImplementedError("Can't handle multiple config groups for compressed-tensors yet")
|
||||
weight_config = tuple(groups.values())[0]["weights"]
|
||||
|
||||
if quant_format == "float-quantized" or quant_format == "int-quantized":
|
||||
if quant_format == "float-quantized" or quant_format == "int-quantized" or quant_format == "naive-quantized":
|
||||
block_size = weight_config.get("block_structure", None)
|
||||
strategy = weight_config.get("strategy")
|
||||
assert strategy == "channel" or strategy == "block"
|
||||
|
||||
Reference in New Issue
Block a user