diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 52140107fb..2babd7f9f0 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2396,6 +2396,8 @@ class StableLMModel(TextModel):
     "LlavaForConditionalGeneration",
     "VoxtralForConditionalGeneration",
     "LlamaForCausalLMEagle3",
+    "Eagle3Speculator",
+    "Eagle3DraftModel",
     "LlamaModel")
 class LlamaModel(TextModel):
     model_arch = gguf.MODEL_ARCH.LLAMA
@@ -2445,6 +2447,11 @@ class LlamaModel(TextModel):
                 logger.info(f"EAGLE3: target_hidden_size = {target_hidden_size} (from target model config)")
             self.gguf_writer.add_uint32(f"{self.gguf_writer.arch}.target_hidden_size", target_hidden_size)
 
+            # Eagle3Speculator norm_before_residual specific handling
+            norm_before_residual = eagle3_raw_config.get("norm_before_residual", False)
+            logger.info(f"EAGLE3: norm_before_residual = {norm_before_residual} (from EAGLE3 config)")
+            self.gguf_writer.add_bool(f"{self.gguf_writer.arch}.norm_before_residual", norm_before_residual)
+
     def set_vocab(self):
         # For EAGLE-3 models, use tokenizer from target model if provided
         if hasattr(self, 'is_eagle3') and self.is_eagle3:
@@ -2528,15 +2535,23 @@ class LlamaModel(TextModel):
 
     def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Callable[[], Tensor]]:
         tensors = super().index_tensors(remote_hf_model_id)
+
+        # Handle Eagle3Speculator nested config
+        if "transformer_layer_config" in self.hparams:
+            self.hparams = {**self.hparams, **self.hparams["transformer_layer_config"]}
+            
         # EAGLE-3 detection: check hparams directly (before self.is_eagle3 is set)
         if "draft_vocab_size" in self.hparams and self.hparams["num_hidden_layers"] == 1:
-            logger.info("EAGLE-3: Renaming midlayer.* to model.layers.0.*")
+            logger.info("EAGLE-3: Renaming midlayer.* or layers.0.* to model.layers.0.*")
             new_tensors = {}
             # EAGLE-3: rename midlayer.* to model.layers.0.* for compatibility with llama model
             for name, gen in tensors.items():
                 if name.startswith("midlayer."):
                     new_name = "model.layers.0." + name[len("midlayer."):]
                     new_tensors[new_name] = gen
+                elif name.startswith("layers.0."): # layers.0.* -> model.layers.0.* (Eagle3Speculator format)
+                    new_name = "model." + name
+                    new_tensors[new_name] = gen
                 else:
                     new_tensors[name] = gen
             return new_tensors
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index b1160ca26d..2ae5094619 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -149,6 +149,7 @@ class Keys:
         DENSE_FEAT_OUT_SIZE               = "{arch}.{dense}_feat_out"
         EAGLE3_EXTRACT_LAYERS             = "{arch}.extract_layers"
         EAGLE3_TARGET_HIDDEN_SIZE         = "{arch}.target_hidden_size"
+        EAGLE3_NORM_BEFORE_RESIDUAL       = "{arch}.norm_before_residual"
 
     class Attention:
         HEAD_COUNT                   = "{arch}.attention.head_count"
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index 4caa5f77ae..8304c63615 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -248,8 +248,9 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
 
     { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
 
-    { LLM_KV_EAGLE3_EXTRACT_LAYERS,     "%s.extract_layers"     },
-    { LLM_KV_EAGLE3_TARGET_HIDDEN_SIZE, "%s.target_hidden_size" },
+    { LLM_KV_EAGLE3_EXTRACT_LAYERS,        "%s.extract_layers"        },
+    { LLM_KV_EAGLE3_TARGET_HIDDEN_SIZE,    "%s.target_hidden_size"    },
+    { LLM_KV_EAGLE3_NORM_BEFORE_RESIDUAL,  "%s.norm_before_residual"  },
 
     { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
     // sentence-transformers dense modules feature dims
diff --git a/src/llama-arch.h b/src/llama-arch.h
index 3e731b5005..36cad138a8 100644
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -292,6 +292,7 @@ enum llm_kv {
 
     LLM_KV_EAGLE3_EXTRACT_LAYERS,
     LLM_KV_EAGLE3_TARGET_HIDDEN_SIZE,
+    LLM_KV_EAGLE3_NORM_BEFORE_RESIDUAL,
 
     LLM_KV_SHORTCONV_L_CACHE,
 
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
index 9272c728e3..f8ed7f364c 100644
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -196,6 +196,9 @@ struct llama_hparams {
     // EAGLE3 draft model - target model hidden size
     uint32_t eagle3_target_hidden_size = 0;
 
+    // EAGLE3 draft model - apply hidden_norm before storing residual
+    bool eagle3_norm_before_residual = false;
+
     // needed by encoder-decoder models (e.g. T5, FLAN-T5)
     // ref: https://github.com/ggerganov/llama.cpp/pull/8141
     llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 287bfe7f14..4879376aef 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -2260,7 +2260,14 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_EAGLE3_TARGET_HIDDEN_SIZE, hparams.eagle3_target_hidden_size);
                 LLAMA_LOG_INFO("%s: EAGLE3 target_hidden_size = %u (draft n_embd = %u)\n", __func__,
                                hparams.eagle3_target_hidden_size, hparams.n_embd);
-                               
+
+                // EAGLE3 norm_before_residual (optional, default false)
+                // compatible with Readhat eagle3 speculator model
+                ml.get_key(LLM_KV_EAGLE3_NORM_BEFORE_RESIDUAL, hparams.eagle3_norm_before_residual, false);
+                if (hparams.eagle3_norm_before_residual) {
+                    LLAMA_LOG_INFO("%s: EAGLE3 norm_before_residual = true\n", __func__);
+                }
+
                 type = LLM_TYPE_UNKNOWN;
             } break;
         case LLM_ARCH_COGVLM:
diff --git a/src/models/eagle3.cpp b/src/models/eagle3.cpp
index 629d89d327..4f9410b360 100644
--- a/src/models/eagle3.cpp
+++ b/src/models/eagle3.cpp
@@ -77,9 +77,6 @@ llm_build_eagle3_decode::llm_build_eagle3_decode(const llama_model & model, cons
     // Single decoder layer (il = 0)
     const int il = 0;
     {
-        // inpL is the concatenated input (normalized inp_embd + normalized inp_g)
-        ggml_tensor * inpSA = inpL;
-
         // Apply input_layernorm to the token embeddings
         ggml_tensor * embd_norm = build_norm(inp_embd,
                 model.layers[il].attn_norm, NULL,
@@ -92,6 +89,12 @@ llm_build_eagle3_decode::llm_build_eagle3_decode(const llama_model & model, cons
                 LLM_NORM_RMS, -1);
         cb(g_norm, "g_norm", il);
 
+        // norm_before_residual: determines what goes into the residual connection (compatible with Readhat eagle3 speculator model)
+        // - false (default): use raw inp_g for residual
+        // - true: use normalized g_norm for residual
+        // inpL is the concatenated input (normalized inp_embd + normalized inp_g)
+        ggml_tensor * inpSA = hparams.eagle3_norm_before_residual ? g_norm : inpL;
+
         // Concatenate normalized inp_embd and normalized inp_g
         cur = ggml_concat(ctx0, embd_norm, g_norm, il);
         cb(cur, "concat_embd", il);