fix: bugs and code style

This commit is contained in:
Sofia Rodrigues
2026-03-03 12:34:12 -03:00
parent 203d5362d4
commit 3d039f8dba
5 changed files with 160 additions and 68 deletions

View File

@@ -112,11 +112,9 @@ namespace UserInfo
Builds a `UserInfo` value from raw strings by applying userinfo percent-encoding.
-/
@[inline]
def ofStrings (username : String) (password : Option String := none) : UserInfo :=
{
username := EncodedUserInfo.encode username
password := EncodedUserInfo.encode <$> password
}
def ofStrings (username : String) (password : Option String := none) : UserInfo where
username := EncodedUserInfo.encode username
password := EncodedUserInfo.encode <$> password
/--
Returns the decoded username, or `none` if decoding fails UTF-8 validation.
@@ -227,11 +225,27 @@ instance : ToString Host where
| .ipv6 addr => s!"[{toString addr}]"
/--
TCP port number.
Reference: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3
Authority port representation, preserving the distinction between:
* no port separator (`example.com`)
* empty port (`example.com:`)
* numeric port (`example.com:443`)
-/
abbrev Port := UInt16
inductive Port where
/--
No `:` port separator is present (for example, `example.com`).
-/
| omitted
/--
A `:` port separator is present with no digits after it (for example, `example.com:`).
-/
| empty
/--
A numeric port value is present (for example, `example.com:443`).
-/
| value (port : UInt16)
deriving Inhabited, Repr, DecidableEq
/--
The authority component of a URI, identifying the network location of the resource.
@@ -250,9 +264,10 @@ structure Authority where
host : Host
/--
Optional port number for connecting to the host.
Port component, preserving whether it is omitted (`example.com`),
explicitly empty (`example.com:`), or numeric (`example.com:443`).
-/
port : Option Port := none
port : Port := .omitted
deriving Inhabited, Repr
instance : ToString Authority where
@@ -263,8 +278,9 @@ instance : ToString Authority where
| some name, none => s!"{name}@"
let hostPart := toString auth.host
let portPart := match auth.port with
| none => ""
| some p => s!":{p}"
| .omitted => ""
| .empty => ":"
| .value p => s!":{p}"
s!"{userPart}{hostPart}{portPart}"
/--
@@ -606,7 +622,7 @@ structure Builder where
/--
The port number.
-/
port : Option URI.Port := none
port : URI.Port := .omitted
/--
Path segments (will be encoded when building).
@@ -692,8 +708,8 @@ def setHostIPv6 (b : Builder) (addr : Net.IPv6Addr) : Builder :=
/--
Sets the port number.
-/
def setPort (b : Builder) (port : Port) : Builder :=
{ b with port := some port }
def setPort (b : Builder) (port : UInt16) : Builder :=
{ b with port := .value port }
/--
Replaces all path segments. Segments will be automatically percent-encoded when building.

View File

@@ -52,10 +52,10 @@ Checks if all characters in a `ByteArray` are allowed in an encoded URI componen
that only verifies the character set, not full encoding validity.
-/
@[inline]
abbrev isAllowedEncodedChars (rule : UInt8 Bool) (s : ByteArray) : Prop :=
abbrev IsAllowedEncodedChars (rule : UInt8 Bool) (s : ByteArray) : Prop :=
s.data.all (isEncodedChar rule)
instance : Decidable (isAllowedEncodedChars r s) :=
instance : Decidable (IsAllowedEncodedChars r s) :=
inferInstanceAs (Decidable (s.data.all (isEncodedChar r) = true))
/--
@@ -63,10 +63,10 @@ Checks if all characters in a `ByteArray` are allowed in an encoded query parame
alternative encoding for space (application/x-www-form-urlencoded).
-/
@[inline]
abbrev isAllowedEncodedQueryChars (rule : UInt8 Bool) (s : ByteArray) : Prop :=
abbrev IsAllowedEncodedQueryChars (rule : UInt8 Bool) (s : ByteArray) : Prop :=
s.data.all (isEncodedQueryChar rule)
instance : Decidable (isAllowedEncodedQueryChars r s) :=
instance : Decidable (IsAllowedEncodedQueryChars r s) :=
inferInstanceAs (Decidable (s.data.all (isEncodedQueryChar r) = true))
/--
@@ -118,13 +118,13 @@ def hexDigitToUInt8? (c : UInt8) : Option UInt8 :=
else
none
private theorem isAllowedEncodedChars.push {bs : ByteArray} (h : isAllowedEncodedChars r bs) (h₁ : isEncodedChar r c) :
isAllowedEncodedChars r (bs.push c) := by
simpa [isAllowedEncodedChars, ByteArray.push, Array.all_push, And.intro h h₁]
private theorem IsAllowedEncodedChars.push {bs : ByteArray} (h : IsAllowedEncodedChars r bs) (h₁ : isEncodedChar r c) :
IsAllowedEncodedChars r (bs.push c) := by
simpa [IsAllowedEncodedChars, ByteArray.push, Array.all_push, And.intro h h₁]
private theorem isAllowedEncodedQueryChars.push {bs : ByteArray} (h : isAllowedEncodedQueryChars r bs) (h₁ : isEncodedQueryChar r c) :
isAllowedEncodedQueryChars r (bs.push c) := by
simpa [isAllowedEncodedQueryChars, ByteArray.push, Array.all_push, And.intro h h₁]
private theorem IsAllowedEncodedQueryChars.push {bs : ByteArray} (h : IsAllowedEncodedQueryChars r bs) (h₁ : isEncodedQueryChar r c) :
IsAllowedEncodedQueryChars r (bs.push c) := by
simpa [IsAllowedEncodedQueryChars, ByteArray.push, Array.all_push, And.intro h h₁]
private theorem isEncodedChar_isAscii (c : UInt8) (h : isEncodedChar r c) : isAsciiByte c := by
simp [isEncodedChar, isAsciiByte] at *
@@ -218,7 +218,7 @@ private theorem ByteArray.toList_toByteArray (ba : ByteArray) :
simp [List.toByteArray_loop_eq, ByteArray.empty]
decide
theorem ascii_is_valid_utf8 (ba : ByteArray) (s : ba.data.all isAsciiByte) : ByteArray.IsValidUTF8 ba := by
theorem isValidUTF8_of_isAsciiByte (ba : ByteArray) (s : ba.data.all isAsciiByte) : ByteArray.IsValidUTF8 ba := by
refine ba.data.toList.map Char.ofUInt8, ?_
rw [List.utf8Encode]
simp only [List.flatMap_map]
@@ -248,7 +248,7 @@ structure EncodedString (r : UInt8 → Bool) where
/--
Proof that all characters in the byte array are valid encoded characters.
-/
valid : isAllowedEncodedChars r toByteArray
valid : IsAllowedEncodedChars r toByteArray
namespace EncodedString
@@ -266,7 +266,7 @@ Appends a single encoded character to an encoded string.
Requires that the character is not '%' to maintain the percent-encoding invariant.
-/
private def push (s : EncodedString r) (c : UInt8) (h : isEncodedChar r c) : EncodedString r :=
s.toByteArray.push c, isAllowedEncodedChars.push s.valid h
s.toByteArray.push c, IsAllowedEncodedChars.push s.valid h
/--
Converts a byte to its percent-encoded hexadecimal representation (%XX). For example, a space
@@ -288,7 +288,7 @@ private def byteToHex (b : UInt8) (s : EncodedString r) : EncodedString r :=
let h₀ := hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide))
isHexDigit_isEncodedChar h₀
exact isAllowedEncodedChars.push (isAllowedEncodedChars.push (isAllowedEncodedChars.push s.valid h1) h2) h3
exact IsAllowedEncodedChars.push (IsAllowedEncodedChars.push (IsAllowedEncodedChars.push s.valid h1) h2) h3
ba, valid
/--
@@ -307,7 +307,7 @@ Attempts to create an `EncodedString` from a `ByteArray`. Returns `some` if the
valid encoded characters and all percent signs are followed by exactly two hex digits, `none` otherwise.
-/
def ofByteArray? (ba : ByteArray) : Option (EncodedString r) :=
if h : isAllowedEncodedChars r ba then
if h : IsAllowedEncodedChars r ba then
if isValidPercentEncoding ba then some ba, h else none
else none
@@ -336,11 +336,11 @@ def ofString! (s : String) : EncodedString r :=
Creates an `EncodedString` from a `ByteArray` with compile-time proofs.
Use this when you have proofs that the byte array is valid.
-/
def new (ba : ByteArray) (valid : isAllowedEncodedChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedString r :=
def new (ba : ByteArray) (valid : IsAllowedEncodedChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedString r :=
ba, valid
instance : ToString (EncodedString r) where
toString es := es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid (fun c h => by simp [isEncodedChar] at h; exact h.left))
toString es := es.toByteArray, isValidUTF8_of_isAsciiByte es.toByteArray (all_of_all_of_imp es.valid (fun c h => by simp [isEncodedChar] at h; exact h.left))
/--
Decodes an `EncodedString` back to a regular `String`. Converts percent-encoded sequences (e.g., "%20")
@@ -401,7 +401,7 @@ structure EncodedQueryString (r : UInt8 → Bool) where
/--
Proof that all characters in the byte array are valid encoded query characters.
-/
valid : isAllowedEncodedQueryChars r toByteArray
valid : IsAllowedEncodedQueryChars r toByteArray
namespace EncodedQueryString
@@ -418,14 +418,14 @@ instance : Inhabited (EncodedQueryString r) where
Appends a single encoded query character to an encoded query string.
-/
private def push (s : EncodedQueryString r) (c : UInt8) (h : isEncodedQueryChar r c) : EncodedQueryString r :=
s.toByteArray.push c, isAllowedEncodedQueryChars.push s.valid h
s.toByteArray.push c, IsAllowedEncodedQueryChars.push s.valid h
/--
Attempts to create an `EncodedQueryString` from a `ByteArray`. Returns `some` if the byte array contains
only valid encoded query characters and all percent signs are followed by exactly two hex digits, `none` otherwise.
-/
def ofByteArray? (ba : ByteArray) (r : UInt8 Bool := isQueryChar) : Option (EncodedQueryString r) :=
if h : isAllowedEncodedQueryChars r ba then
if h : IsAllowedEncodedQueryChars r ba then
if isValidPercentEncoding ba then some ba, h else none
else none
@@ -454,7 +454,7 @@ def ofString! (s : String) (r : UInt8 → Bool := isQueryChar) : EncodedQueryStr
Creates an `EncodedQueryString` from a `ByteArray` with compile-time proofs.
Use this when you have proofs that the byte array is valid.
-/
def new (ba : ByteArray) (valid : isAllowedEncodedQueryChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedQueryString r :=
def new (ba : ByteArray) (valid : IsAllowedEncodedQueryChars r ba) (_validEncoding : isValidPercentEncoding ba) : EncodedQueryString r :=
ba, valid
/--
@@ -472,7 +472,7 @@ private def byteToHex (b : UInt8) (s : EncodedQueryString r) : EncodedQueryStrin
isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (BitVec.toNat_ushiftRight_lt b.toBitVec 4 (by decide)))
have h3 : isEncodedQueryChar r (hexDigit (b &&& 0xF)) :=
isHexDigit_isEncodedQueryChar (hexDigit_isHexDigit (@UInt8.and_lt_add_one b 0xF (by decide)))
exact isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push (isAllowedEncodedQueryChars.push s.valid h1) h2) h3
exact IsAllowedEncodedQueryChars.push (IsAllowedEncodedQueryChars.push (IsAllowedEncodedQueryChars.push s.valid h1) h2) h3
ba, valid
/--
@@ -492,7 +492,7 @@ def encode (s : String) (r : UInt8 → Bool := isQueryChar) : EncodedQueryString
Converts an `EncodedQueryString` to a `String`, given a proof that all characters satisfying `r` are ASCII.
-/
def toString (es : EncodedQueryString r) : String :=
es.toByteArray, ascii_is_valid_utf8 es.toByteArray (all_of_all_of_imp es.valid (fun c h => isEncodedQueryChar_isAscii c h))
es.toByteArray, isValidUTF8_of_isAsciiByte es.toByteArray (all_of_all_of_imp es.valid (fun c h => isEncodedQueryChar_isAscii c h))
/--
Decodes an `EncodedQueryString` back to a regular `String`. Converts percent-encoded sequences and '+'
@@ -649,7 +649,7 @@ end EncodedUserInfo
/--
A percent-encoded URI query parameter. Valid characters are `pchar / "/" / "?"` with '+' for spaces.
-/
abbrev EncodedQueryParam := EncodedQueryString isQueryChar
abbrev EncodedQueryParam := EncodedQueryString isQueryDataChar
namespace EncodedQueryParam
@@ -657,25 +657,25 @@ namespace EncodedQueryParam
Encodes a raw string into an encoded query parameter.
-/
def encode (s : String) : EncodedQueryParam :=
EncodedQueryString.encode (r := isQueryChar) s
EncodedQueryString.encode (r := isQueryDataChar) s
/--
Attempts to create an encoded query parameter from raw bytes.
-/
def ofByteArray? (ba : ByteArray) : Option EncodedQueryParam :=
EncodedQueryString.ofByteArray? (r := isQueryChar) ba
EncodedQueryString.ofByteArray? (r := isQueryDataChar) ba
/--
Creates an encoded query parameter from raw bytes, panicking on invalid encoding.
-/
def ofByteArray! (ba : ByteArray) : EncodedQueryParam :=
EncodedQueryString.ofByteArray! (r := isQueryChar) ba
EncodedQueryString.ofByteArray! (r := isQueryDataChar) ba
/--
Attempts to create an encoded query parameter from an encoded string.
-/
def fromString? (s : String) : Option EncodedQueryParam :=
EncodedQueryString.ofString? (r := isQueryChar) s
EncodedQueryString.ofString? (r := isQueryDataChar) s
/--
Decodes an encoded query parameter back to a UTF-8 string.

View File

@@ -65,10 +65,10 @@ private def parseScheme (config : URI.Config) : Parser URI.Scheme := do
else
fail "invalid scheme"
-- port = *DIGIT
-- port = 1*DIGIT
private def parsePortNumber : Parser UInt16 := do
let portBytes takeWhileUpTo isDigit 5
if portBytes.size = 0 then fail "empty port number"
let portBytes takeWhileUpTo1 isDigit 5
let portStr := String.fromUTF8! portBytes.toByteArray
let some portNum := String.toNat? portStr
@@ -161,18 +161,28 @@ private def parseHost (config : URI.Config) : Parser URI.Host := do
-- authority = [ userinfo "@" ] host [ ":" port ]
private def parseAuthority (config : URI.Config) : Parser URI.Authority := do
let userinfo tryOpt do
let userInfo tryOpt do
let ui parseUserInfo config
skipByte '@'.toUInt8
return ui
let host parseHost config
let port optional do
skipByte ':'.toUInt8
parsePortNumber
let port : URI.AuthorityPort
if peekIs (· == ':'.toUInt8) then
skipByte ':'.toUInt8
if ( peekWhen? Internal.Char.isDigit).isSome then
pure (.value ( parsePortNumber))
else
let next peek?
if next.isNone || next.any (fun c => c = '/'.toUInt8 c = '?'.toUInt8 c = '#'.toUInt8) then
pure .empty
else
fail "invalid port number"
else
pure .omitted
return { userInfo := userinfo, host := host, port := port }
return { userInfo, host, port }
-- segment = *pchar
private def parseSegment (config : URI.Config) : Parser ByteSlice := do
@@ -388,17 +398,27 @@ where
let host parseHost config
skipByteChar ':'
let port parsePortNumber
return .authorityForm { host, port := some port }
return .authorityForm { host, port := .value port }
/--
Parses an HTTP `Host` header value.
-/
public def parseHostHeader (config : URI.Config := {}) : Parser (URI.Host × Option UInt16) := do
public def parseHostHeader (config : URI.Config := {}) : Parser (URI.Host × URI.Port) := do
let host parseHost config
let port optional do
skipByte ':'.toUInt8
parsePortNumber
let port : URI.AuthorityPort
if peekIs (· == ':'.toUInt8) then
skipByte ':'.toUInt8
if ( peekWhen? Internal.Char.isDigit).isSome then
pure (.value ( parsePortNumber))
else
let next peek?
if next.isNone then
pure .empty
else
fail "invalid host header port"
else
pure .omitted
if ¬( isEof) then
fail "invalid host header"

View File

@@ -292,4 +292,15 @@ that provides it.
def isUserInfoChar (c : UInt8) : Bool :=
isUnreserved c || isSubDelims c || c = ':'.toUInt8
/--
Checks if a byte is a valid character in a URI query component,
excluding the typical key/value separators `&` and `=`.
Inspired by `query = *( pchar / "/" / "?" )` from RFC 3986,
but disallows `&` and `=` so they can be treated as structural separators.
-/
@[inline, expose]
def isQueryDataChar (c : UInt8) : Bool :=
isQueryChar c && c '&'.toUInt8 && c '='.toUInt8
end Std.Http.Internal.Char

View File

@@ -212,6 +212,40 @@ info: some " "
#eval parseCheck "https://user:pass@secure.example.com/private"
#eval parseCheck "/double//slash//path"
#eval parseCheck "http://user%40example:pass%3Aword@host.com"
#eval parseCheck "http://example.com:/"
#eval parseCheck "http://example.com:/?q=1"
-- `&` in a key must be percent-encoded so toRawString round-trips correctly.
#guard
let query := URI.Query.empty.insert "a&b" "1"
query.toRawString == "a%26b=1"
-- `=` in a key must be percent-encoded so re-parsing preserves the key.
#guard
let query := URI.Query.empty.insert "a=b" "1"
query.toRawString == "a%3Db=1"
-- `&` in a value must be percent-encoded.
#guard
let query := URI.Query.empty.insert "key" "a&b"
query.toRawString == "key=a%26b"
-- `=` in a value is technically safe (parser uses first `=`), but encoding it
-- is still correct and keeps representation unambiguous.
#guard
let query := URI.Query.empty.insert "key" "a=b"
query.toRawString == "key=a%3Db"
-- Round-trip: insert → toRawString → re-parse should preserve the parameter.
#guard
let original := URI.Query.empty.insert "a&b" "c=d"
let raw := original.toRawString
-- Parse via a synthetic origin-form request target
match (URI.Parser.parseRequestTarget <* Std.Internal.Parsec.eof).run
s!"/path?{raw}".toUTF8 with
| .ok result =>
(result.query.get "a&b" == some "c=d")
| .error _ => false
#guard
match (parseRequestTarget <* Std.Internal.Parsec.eof).run "http:80".toUTF8 with
@@ -291,7 +325,7 @@ info: Std.Http.RequestTarget.originForm { segments := #[], absolute := true } no
/--
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.name "example.com", port := some 8080 }
{ userInfo := none, host := Std.Http.URI.Host.name "example.com", port := Std.Http.URI.Port.value 8080 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -299,7 +333,8 @@ info: Std.Http.RequestTarget.authorityForm
IO.println (repr result)
/--
info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv4 192.168.1.1, port := some 3000 }
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.ipv4 192.168.1.1, port := Std.Http.URI.Port.value 3000 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -307,7 +342,8 @@ info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.
IO.println (repr result)
/--
info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv6 ::1, port := some 8080 }
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.ipv6 ::1, port := Std.Http.URI.Port.value 8080 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -317,7 +353,9 @@ info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.
/--
info: Std.Http.RequestTarget.absoluteForm
{ scheme := "https",
authority := some { userInfo := none, host := Std.Http.URI.Host.name "example.com", port := some 8080 },
authority := some { userInfo := none,
host := Std.Http.URI.Host.name "example.com",
port := Std.Http.URI.Port.value 8080 },
path := { segments := #["ata"], absolute := true },
query := #[],
fragment := none }
@@ -331,7 +369,9 @@ info: Std.Http.RequestTarget.absoluteForm
/--
info: Std.Http.RequestTarget.absoluteForm
{ scheme := "http",
authority := some { userInfo := none, host := Std.Http.URI.Host.ipv6 2001:db8::1, port := some 8080 },
authority := some { userInfo := none,
host := Std.Http.URI.Host.ipv6 2001:db8::1,
port := Std.Http.URI.Port.value 8080 },
path := { segments := #["path"], absolute := true },
query := #[],
fragment := none }
@@ -347,7 +387,7 @@ info: Std.Http.RequestTarget.absoluteForm
{ scheme := "https",
authority := some { userInfo := some { username := "user%20b", password := some "pass" },
host := Std.Http.URI.Host.name "secure.example.com",
port := none },
port := Std.Http.URI.Port.omitted },
path := { segments := #["private"], absolute := true },
query := #[],
fragment := none }
@@ -847,7 +887,7 @@ info: Std.Http.RequestTarget.originForm { segments := #["caf%C3%A9"], absolute :
/--
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.name "proxy.example.com", port := some 3128 }
{ userInfo := none, host := Std.Http.URI.Host.name "proxy.example.com", port := Std.Http.URI.Port.value 3128 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -855,7 +895,8 @@ info: Std.Http.RequestTarget.authorityForm
IO.println (repr result)
/--
info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.URI.Host.ipv4 127.0.0.1, port := some 8080 }
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.ipv4 127.0.0.1, port := Std.Http.URI.Port.value 8080 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -864,7 +905,7 @@ info: Std.Http.RequestTarget.authorityForm { userInfo := none, host := Std.Http.
/--
info: Std.Http.RequestTarget.authorityForm
{ userInfo := none, host := Std.Http.URI.Host.name "1example.com", port := some 8080 }
{ userInfo := none, host := Std.Http.URI.Host.name "1example.com", port := Std.Http.URI.Port.value 8080 }
-/
#guard_msgs in
#eval show IO _ from do
@@ -874,7 +915,9 @@ info: Std.Http.RequestTarget.authorityForm
/--
info: Std.Http.RequestTarget.absoluteForm
{ scheme := "http",
authority := some { userInfo := none, host := Std.Http.URI.Host.name "1example.com", port := none },
authority := some { userInfo := none,
host := Std.Http.URI.Host.name "1example.com",
port := Std.Http.URI.Port.omitted },
path := { segments := #["path"], absolute := true },
query := #[],
fragment := none }
@@ -888,7 +931,9 @@ info: Std.Http.RequestTarget.absoluteForm
/--
info: Std.Http.RequestTarget.absoluteForm
{ scheme := "http",
authority := some { userInfo := none, host := Std.Http.URI.Host.name "123abc.example.com", port := none },
authority := some { userInfo := none,
host := Std.Http.URI.Host.name "123abc.example.com",
port := Std.Http.URI.Port.omitted },
path := { segments := #["page"], absolute := true },
query := #[],
fragment := none }