diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index a9e17ee1fa..7ef9ffb27b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2395,6 +2395,7 @@ class StableLMModel(TextModel): "VLlama3ForCausalLM", "LlavaForConditionalGeneration", "VoxtralForConditionalGeneration", + "LlamaForCausalLMEagle3", "LlamaModel") class LlamaModel(TextModel): model_arch = gguf.MODEL_ARCH.LLAMA @@ -2477,10 +2478,6 @@ class LlamaModel(TextModel): # Llama 3 self._set_vocab_gpt2() - # Restore original dir_model for EAGLE-3 - if hasattr(self, 'is_eagle3') and self.is_eagle3: - self.dir_model = original_dir_model - # Apply to CodeLlama only (and ignore for Llama 3 with a vocab size of 128256) if self.hparams.get("vocab_size", 32000) == 32016: special_vocab = gguf.SpecialVocab( @@ -2504,6 +2501,10 @@ class LlamaModel(TextModel): if self.hparams.get("vocab_size", 32000) == 49152: self.gguf_writer.add_add_bos_token(False) + # Restore original dir_model for EAGLE-3 + if hasattr(self, 'is_eagle3') and self.is_eagle3: + self.dir_model = original_dir_model + def set_gguf_parameters(self): super().set_gguf_parameters() hparams = self.hparams diff --git a/src/models/qwen3.cpp b/src/models/qwen3.cpp index a5cfffa531..c1f34624c0 100644 --- a/src/models/qwen3.cpp +++ b/src/models/qwen3.cpp @@ -21,6 +21,17 @@ llm_build_qwen3::llm_build_qwen3(const llama_model & model, const llm_graph_para for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; + // EAGLE3: Extract intermediate layer features from target model at layer INPUT + if (eagle3 && cparams.eagle3_extract_enabled && !eagle3->extract_layer_indices.empty()) { + static const char * eagle3_extract_names[] = {"eagle3_extract_0", "eagle3_extract_1", "eagle3_extract_2"}; + for (size_t i = 0; i < eagle3->extract_layer_indices.size() && i < 3; ++i) { + if (eagle3->extract_layer_indices[i] == il) { + cb(inpL, eagle3_extract_names[i], il); + break; + } + } + } + // norm cur = build_norm(inpL, model.layers[il].attn_norm, NULL,