From f9d3983de28abe7cce306083986fe8527c44b82b Mon Sep 17 00:00:00 2001 From: Gregorius Bima Kharisma Wicaksana <51526537+bimakw@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:09:44 +0700 Subject: [PATCH] fix: generate IDs for HTML headings without id attribute (#36233) This PR fixes #27383 where HTML headings like `

Title

` in markdown files would have empty permalink anchors --------- Co-authored-by: wxiaoguang --- models/renderhelper/repo_file.go | 2 +- models/renderhelper/repo_wiki.go | 2 +- modules/markup/html.go | 2 +- modules/markup/html_node.go | 45 ++++++++++++- modules/markup/html_node_test.go | 104 +++++++++++++++++++++++++++++++ modules/markup/render.go | 9 +++ 6 files changed, 160 insertions(+), 4 deletions(-) create mode 100644 modules/markup/html_node_test.go diff --git a/models/renderhelper/repo_file.go b/models/renderhelper/repo_file.go index e0375ed280..f1df8e89e0 100644 --- a/models/renderhelper/repo_file.go +++ b/models/renderhelper/repo_file.go @@ -70,6 +70,6 @@ func NewRenderContextRepoFile(ctx context.Context, repo *repo_model.Repository, "repo": helper.opts.DeprecatedRepoName, }) } - rctx = rctx.WithHelper(helper) + rctx = rctx.WithHelper(helper).WithEnableHeadingIDGeneration(true) return rctx } diff --git a/models/renderhelper/repo_wiki.go b/models/renderhelper/repo_wiki.go index b75f1b9701..218b1e4a67 100644 --- a/models/renderhelper/repo_wiki.go +++ b/models/renderhelper/repo_wiki.go @@ -71,7 +71,7 @@ func NewRenderContextRepoWiki(ctx context.Context, repo *repo_model.Repository, "markupAllowShortIssuePattern": "true", }) } - rctx = rctx.WithHelper(helper) + rctx = rctx.WithHelper(helper).WithEnableHeadingIDGeneration(true) helper.ctx = rctx return rctx } diff --git a/modules/markup/html.go b/modules/markup/html.go index 51afd4be00..9676241b54 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -314,7 +314,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod return node.NextSibling } - processNodeAttrID(node) + processNodeAttrID(ctx, node) processFootnoteNode(ctx, node) // FIXME: the footnote processing should be done in the "footnote.go" renderer directly if isEmojiNode(node) { diff --git a/modules/markup/html_node.go b/modules/markup/html_node.go index 4eb78fdd2b..066ee9711d 100644 --- a/modules/markup/html_node.go +++ b/modules/markup/html_node.go @@ -6,6 +6,8 @@ package markup import ( "strings" + "code.gitea.io/gitea/modules/markup/common" + "golang.org/x/net/html" ) @@ -23,16 +25,57 @@ func isAnchorHrefFootnote(s string) bool { return strings.HasPrefix(s, "#fnref:user-content-") || strings.HasPrefix(s, "#fn:user-content-") } -func processNodeAttrID(node *html.Node) { +// isHeadingTag returns true if the node is a heading tag (h1-h6) +func isHeadingTag(node *html.Node) bool { + return node.Type == html.ElementNode && + len(node.Data) == 2 && + node.Data[0] == 'h' && + node.Data[1] >= '1' && node.Data[1] <= '6' +} + +// getNodeText extracts the text content from a node and its children +func getNodeText(node *html.Node) string { + var text strings.Builder + var extractText func(*html.Node) + extractText = func(n *html.Node) { + if n.Type == html.TextNode { + text.WriteString(n.Data) + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + extractText(c) + } + } + extractText(node) + return text.String() +} + +func processNodeAttrID(ctx *RenderContext, node *html.Node) { // Add user-content- to IDs and "#" links if they don't already have them, // and convert the link href to a relative link to the host root + hasID := false for idx, attr := range node.Attr { if attr.Key == "id" { + hasID = true if !isAnchorIDUserContent(attr.Val) { node.Attr[idx].Val = "user-content-" + attr.Val } } } + + // For heading tags (h1-h6) without an id attribute, generate one from the text content. + // This ensures HTML headings like

Title

get proper permalink anchors + // matching the behavior of Markdown headings. + // Only enabled for repository files and wiki pages via EnableHeadingIDGeneration option. + if !hasID && isHeadingTag(node) && ctx.RenderOptions.EnableHeadingIDGeneration { + text := getNodeText(node) + if text != "" { + // Use the same CleanValue function used by Markdown heading ID generation + cleanedID := string(common.CleanValue([]byte(text))) + if cleanedID != "" { + node.Attr = append(node.Attr, html.Attribute{Key: "id", Val: "user-content-" + cleanedID}) + } + } + } } func processFootnoteNode(ctx *RenderContext, node *html.Node) { diff --git a/modules/markup/html_node_test.go b/modules/markup/html_node_test.go new file mode 100644 index 0000000000..007e3c2a12 --- /dev/null +++ b/modules/markup/html_node_test.go @@ -0,0 +1,104 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestProcessNodeAttrID_HTMLHeadingWithoutID(t *testing.T) { + // Test that HTML headings without id get an auto-generated id from their text content + // when EnableHeadingIDGeneration is true (for repo files and wiki pages) + testCases := []struct { + name string + input string + expected string + }{ + { + name: "h1 without id", + input: `

Heading without ID

`, + expected: `

Heading without ID

`, + }, + { + name: "h2 without id", + input: `

Another Heading

`, + expected: `

Another Heading

`, + }, + { + name: "h3 without id", + input: `

Third Level

`, + expected: `

Third Level

`, + }, + { + name: "h1 with existing id should keep it", + input: `

Heading with ID

`, + expected: `

Heading with ID

`, + }, + { + name: "h1 with user-content prefix should not double prefix", + input: `

Already Prefixed

`, + expected: `

Already Prefixed

`, + }, + { + name: "heading with special characters", + input: `

What is Wine Staging?

`, + expected: `

What is Wine Staging?

`, + }, + { + name: "heading with nested elements", + input: `

Bold and Italic

`, + expected: `

Bold and Italic

`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var result strings.Builder + ctx := NewTestRenderContext().WithEnableHeadingIDGeneration(true) + err := PostProcessDefault(ctx, strings.NewReader(tc.input), &result) + assert.NoError(t, err) + assert.Equal(t, tc.expected, strings.TrimSpace(result.String())) + }) + } +} + +func TestProcessNodeAttrID_SkipHeadingIDForComments(t *testing.T) { + // Test that HTML headings in comment-like contexts (issue comments) + // do NOT get auto-generated IDs to avoid duplicate IDs on pages with multiple documents. + // This is controlled by EnableHeadingIDGeneration which defaults to false. + testCases := []struct { + name string + input string + expected string + }{ + { + name: "h1 without id in comment context", + input: `

Heading without ID

`, + expected: `

Heading without ID

`, + }, + { + name: "h2 without id in comment context", + input: `

Another Heading

`, + expected: `

Another Heading

`, + }, + { + name: "h1 with existing id should still be prefixed", + input: `

Heading with ID

`, + expected: `

Heading with ID

`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var result strings.Builder + // Default context without EnableHeadingIDGeneration (simulates comment rendering) + err := PostProcessDefault(NewTestRenderContext(), strings.NewReader(tc.input), &result) + assert.NoError(t, err) + assert.Equal(t, tc.expected, strings.TrimSpace(result.String())) + }) + } +} diff --git a/modules/markup/render.go b/modules/markup/render.go index c645749065..12f002b0c6 100644 --- a/modules/markup/render.go +++ b/modules/markup/render.go @@ -54,6 +54,10 @@ type RenderOptions struct { // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page InStandalonePage bool + + // EnableHeadingIDGeneration controls whether to auto-generate IDs for HTML headings without id attribute. + // This should be enabled for repository files and wiki pages, but disabled for comments to avoid duplicate IDs. + EnableHeadingIDGeneration bool } // RenderContext represents a render context @@ -112,6 +116,11 @@ func (ctx *RenderContext) WithInStandalonePage(v bool) *RenderContext { return ctx } +func (ctx *RenderContext) WithEnableHeadingIDGeneration(v bool) *RenderContext { + ctx.RenderOptions.EnableHeadingIDGeneration = v + return ctx +} + func (ctx *RenderContext) WithUseAbsoluteLink(v bool) *RenderContext { ctx.RenderOptions.UseAbsoluteLink = v return ctx