From 1c1a7b8492d83310e129ae1faa1e9167d4c46577 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 14 Jan 2026 23:35:52 +0800 Subject: [PATCH] Fix incorrect text content detection (#36364) Fix #36325 --- modules/typesniffer/typesniffer.go | 23 +++++++++++++++++++++++ modules/typesniffer/typesniffer_test.go | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index 2e8d9c4a1e..0c4867d8f0 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -107,6 +107,17 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) { return brands, true } +func isEmbeddedOpenType(data []byte) bool { + // https://www.w3.org/submissions/EOT + if len(data) < 80 { + return false + } + version := binary.LittleEndian.Uint32(data[8:]) // Actually this standard is abandoned (for IE6-IE11 only), there are only 3 versions defined + magic := binary.LittleEndian.Uint16(data[34:36]) // MagicNumber: 0x504C ("LP") + reserved := data[64:80] // Reserved 1-4 (each: unsigned long) + return (version == 0x00010000 || version == 0x00020001 || version == 0x00020002) && magic == 0x504C && bytes.Count(reserved, []byte{0}) == len(reserved) +} + // DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty. func DetectContentType(data []byte) SniffedType { if len(data) == 0 { @@ -119,6 +130,18 @@ func DetectContentType(data []byte) SniffedType { data = data[:SniffContentSize] } + const typeMsFontObject = "application/vnd.ms-fontobject" + if ct == typeMsFontObject { + // Stupid Golang blindly detects any content with 34th-35th bytes being "LP" as font. + // If it is not really for ".eot" content, we try to detect it again by hiding the "LP", see the test for more details. + if isEmbeddedOpenType(data) { + return SniffedType{typeMsFontObject} + } + data = slices.Clone(data) + data[34] = 'l' + ct = http.DetectContentType(data) + } + vars := globalVars() // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888 detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html") diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go index ad3f78afdc..17d67f41f7 100644 --- a/modules/typesniffer/typesniffer_test.go +++ b/modules/typesniffer/typesniffer_test.go @@ -6,6 +6,7 @@ package typesniffer import ( "encoding/base64" "encoding/hex" + "net/http" "strings" "testing" @@ -154,3 +155,25 @@ func TestDetectContentTypeAvif(t *testing.T) { st := DetectContentType(buf) assert.Equal(t, MimeTypeImageAvif, st.contentType) } + +func TestDetectContentTypeIncorrectFont(t *testing.T) { + s := "Stupid Golang keep detecting 34th LP as font" + // They don't want to have any improvement to it: https://github.com/golang/go/issues/77172 + golangDetected := http.DetectContentType([]byte(s)) + assert.Equal(t, "application/vnd.ms-fontobject", golangDetected) + // We have to make our patch to make it work correctly + ourDetected := DetectContentType([]byte(s)) + assert.Equal(t, "text/plain; charset=utf-8", ourDetected.contentType) + + // For binary content, ensure it still detects as font. The content is from "opensans-regular.eot" + b := []byte{ + 0x3d, 0x30, 0x00, 0x00, 0x6b, 0x2f, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x02, 0x0b, 0x06, 0x06, 0x03, 0x05, 0x04, 0x02, 0x02, 0x04, 0x01, 0x00, 0x90, 0x01, 0x00, 0x00, + 0x04, 0x00, 0x4c, 0x50, 0xef, 0x02, 0x00, 0xe0, 0x5b, 0x20, 0x00, 0x40, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x9f, 0x01, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x63, 0xf4, 0x17, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x12, 0x00, 0x4f, 0x00, 0x70, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x20, 0x00, 0x53, 0x00, + } + assert.Equal(t, "application/vnd.ms-fontobject", http.DetectContentType(b)) + assert.Equal(t, "application/vnd.ms-fontobject", DetectContentType(b).contentType) +}