Fix incorrect text content detection (#36364) (#36369)

Backport #36364 by wxiaoguang

Fix #36325

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Giteabot
2026-01-15 00:45:38 +08:00
committed by GitHub
parent 812a3cffb3
commit ef6ab681f7
2 changed files with 46 additions and 0 deletions

View File

@@ -107,6 +107,17 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
return brands, true
}
func isEmbeddedOpenType(data []byte) bool {
// https://www.w3.org/submissions/EOT
if len(data) < 80 {
return false
}
version := binary.LittleEndian.Uint32(data[8:]) // Actually this standard is abandoned (for IE6-IE11 only), there are only 3 versions defined
magic := binary.LittleEndian.Uint16(data[34:36]) // MagicNumber: 0x504C ("LP")
reserved := data[64:80] // Reserved 1-4 (each: unsigned long)
return (version == 0x00010000 || version == 0x00020001 || version == 0x00020002) && magic == 0x504C && bytes.Count(reserved, []byte{0}) == len(reserved)
}
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
@@ -119,6 +130,18 @@ func DetectContentType(data []byte) SniffedType {
data = data[:SniffContentSize]
}
const typeMsFontObject = "application/vnd.ms-fontobject"
if ct == typeMsFontObject {
// Stupid Golang blindly detects any content with 34th-35th bytes being "LP" as font.
// If it is not really for ".eot" content, we try to detect it again by hiding the "LP", see the test for more details.
if isEmbeddedOpenType(data) {
return SniffedType{typeMsFontObject}
}
data = slices.Clone(data)
data[34] = 'l'
ct = http.DetectContentType(data)
}
vars := globalVars()
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")

View File

@@ -6,6 +6,7 @@ package typesniffer
import (
"encoding/base64"
"encoding/hex"
"net/http"
"strings"
"testing"
@@ -154,3 +155,25 @@ func TestDetectContentTypeAvif(t *testing.T) {
st := DetectContentType(buf)
assert.Equal(t, MimeTypeImageAvif, st.contentType)
}
func TestDetectContentTypeIncorrectFont(t *testing.T) {
s := "Stupid Golang keep detecting 34th LP as font"
// They don't want to have any improvement to it: https://github.com/golang/go/issues/77172
golangDetected := http.DetectContentType([]byte(s))
assert.Equal(t, "application/vnd.ms-fontobject", golangDetected)
// We have to make our patch to make it work correctly
ourDetected := DetectContentType([]byte(s))
assert.Equal(t, "text/plain; charset=utf-8", ourDetected.contentType)
// For binary content, ensure it still detects as font. The content is from "opensans-regular.eot"
b := []byte{
0x3d, 0x30, 0x00, 0x00, 0x6b, 0x2f, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00,
0x02, 0x0b, 0x06, 0x06, 0x03, 0x05, 0x04, 0x02, 0x02, 0x04, 0x01, 0x00, 0x90, 0x01, 0x00, 0x00,
0x04, 0x00, 0x4c, 0x50, 0xef, 0x02, 0x00, 0xe0, 0x5b, 0x20, 0x00, 0x40, 0x28, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x9f, 0x01, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x63, 0xf4, 0x17, 0x14,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x12, 0x00, 0x4f, 0x00, 0x70, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x20, 0x00, 0x53, 0x00,
}
assert.Equal(t, "application/vnd.ms-fontobject", http.DetectContentType(b))
assert.Equal(t, "application/vnd.ms-fontobject", DetectContentType(b).contentType)
}