fix: remove char testBit

2026-03-17 18:34:06 +00:00 · 2026-03-03 11:57:20 -03:00
parent 083fec29c8
commit 11d3860c69
1 changed files with 142 additions and 100 deletions
--- a/src/Std/Internal/Http/Internal/Char.lean
+++ b/src/Std/Internal/Http/Internal/Char.lean
@@ -26,16 +26,44 @@ set_option linter.all true
 /--
 Checks if a character is ASCII (Unicode code point < 128).
 -/
-@[expose]
-def isAscii (ch : Char) : Bool :=
-  ch.toNat < 128
+@[inline, expose]
+def isAscii (c : Char) : Bool :=
+  c.toNat < 128
+
+/--
+Checks if a character is a decimal digit (0-9).
+-/
+@[inline, expose]
+def isDigitChar (c : Char) : Bool :=
+  c ≥ '0' ∧ c ≤ '9'
+
+/--
+Checks if a character is an alphabetic character (a-z or A-Z).
+-/
+@[inline, expose]
+def isAlphaChar (c : Char) : Bool :=
+  (c ≥ 'A' ∧ c ≤ 'Z') ∨ (c ≥ 'a' ∧ c ≤ 'z')

 /--
 Checks if a byte represents an ASCII character (value < 128).
 -/
-@[expose]
-def isAsciiByte (ch : Char) : Bool :=
-  isAscii ch
+@[inline, expose]
+def isAsciiByte (c : UInt8) : Bool :=
+  c < 128
+
+/--
+Checks if a byte is a decimal digit (0-9).
+-/
+@[inline, expose]
+def isDigit (c : UInt8) : Bool :=
+  c >= '0'.toUInt8 && c <= '9'.toUInt8
+
+/--
+Checks if a byte is an alphabetic character (a-z or A-Z).
+-/
+@[inline, expose]
+def isAlpha (c : UInt8) : Bool :=
+  (c >= 'A'.toUInt8 && c <= 'Z'.toUInt8) || (c >= 'a'.toUInt8 && c <= 'z'.toUInt8)

 /--
 tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
@@ -43,106 +71,109 @@ tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  / DIGIT / ALPHA
  ; Visible token characters used to build `token`.
 -/
-def tchar (ch : Char) : Bool :=
-  ch matches '!' | '#' | '$' | '%' | '&' | '\'' | '*' |
-    '+' | '-' | '.' | '^' | '_' | '`' | '|' | '~' ||
-    Char.isDigit ch || Char.isAlpha ch
-
-/--
-Checks if a character is a valid HTTP token character per RFC 9110 §5.6.2.
-
-token = 1*tchar
-/
-def token (ch : Char) : Bool :=
-  ch matches '!' | '#' | '$' | '%' | '&' | '\'' | '*' |
-    '+' | '-' | '.' | '^' | '_' | '`' | '|' | '~' ||
-    Char.isDigit ch || Char.isAlpha ch
+@[inline]
+def tchar (c : Char) : Bool :=
+  (c matches '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '^' | '_' | '`' | '|' | '~') ||
+  isDigitChar c ||
+  isAlphaChar c

 /--
 vchar = %x21-7E
 ; Visible (printing) ASCII characters.
 -/
-def vchar (ch : Char) : Bool :=
-  ch ≥ '!' && ch ≤ '~'
+@[inline]
+def vchar (c : Char) : Bool :=
+  c ≥ '!' ∧ c ≤ '~'

 /--
 qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E
 ; ASCII-only variant (no obs-text).
 -/
-def qdtext (ch : Char) : Bool :=
-  ch matches '\t' | ' ' | '!' ||
-    ('#' ≤ ch && ch ≤ '[') ||
-    (']' ≤ ch && ch ≤ '~')
+@[inline]
+def qdtext (c : Char) : Bool :=
+  (c matches '\t' | ' ' | '!') ||
+  ('#' ≤ c ∧ c ≤ '[') ||
+  (']' ≤ c ∧ c ≤ '~')

 /--
-quoted-pair = "\" ( HTAB / SP / VCHAR )
+quoted-pair = "\\" ( HTAB / SP / VCHAR )
 ; ASCII-only variant (no obs-text).
 -/
-def quotedPairChar (ch : Char) : Bool :=
-  ch matches '\t' | ' ' || vchar ch
+@[inline]
+def quotedPairChar (c : Char) : Bool :=
+  (c matches '\t' | ' ') || vchar c

 /--
 quoted-string body character class:
 ( qdtext / quoted-pair payload ) in ASCII-only mode.
 -/
-def quotedStringChar (ch : Char) : Bool :=
-  qdtext ch || quotedPairChar ch
+@[inline]
+def quotedStringChar (c : Char) : Bool :=
+  qdtext c || quotedPairChar c

 /--
 field-vchar = VCHAR
 ; ASCII-only variant (no obs-text).
 -/
-def fieldVchar (ch : Char) : Bool :=
-  vchar ch
+@[inline]
+def fieldVchar (c : Char) : Bool :=
+  vchar c

 /--
 field-content character class:
 field-vchar / SP / HTAB
 ; ASCII-only variant (no obs-text).
 -/
-def fieldContent (ch : Char) : Bool :=
-  fieldVchar ch || ch matches ' ' | '\t'
+@[inline]
+def fieldContent (c : Char) : Bool :=
+  fieldVchar c || (c matches ' ' | '\t')

 /--
 ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E
 ; ASCII-only variant (no obs-text).
 -/
-def ctext (ch : Char) : Bool :=
-  ch matches '\t' | ' ' ||
-    ('!' ≤ ch && ch ≤ '\'') ||
-    ('*' ≤ ch && ch ≤ '[') ||
-    (']' ≤ ch && ch ≤ '~')
+@[inline]
+def ctext (c : Char) : Bool :=
+  (c matches '\t' | ' ') ||
+  ('!' ≤ c ∧ c ≤ '\'') ||
+  ('*' ≤ c ∧ c ≤ '[') ||
+  (']' ≤ c ∧ c ≤ '~')

 /--
 etagc = "!" / %x23-7E
 ; ASCII-only variant (no obs-text).
 -/
-def etagc (ch : Char) : Bool :=
-  ch matches '!' || ('#' ≤ ch && ch ≤ '~')
+@[inline]
+def etagc (c : Char) : Bool :=
+  c = '!' || ('#' ≤ c ∧ c ≤ '~')

 /--
 OWS = *( SP / HTAB )  (character class only)
 -/
-def ows (ch : Char) : Bool :=
-  ch matches ' ' | '\t'
+@[inline]
+def ows (c : Char) : Bool :=
+  c matches ' ' | '\t'

 /--
 BWS = OWS (character class alias)
 -/
-def bws (ch : Char) : Bool :=
-  ows ch
+@[inline]
+def bws (c : Char) : Bool :=
+  ows c

 /--
 RWS = 1*( SP / HTAB ) (character class is identical to `ows`)
 -/
-def rws (ch : Char) : Bool :=
-  ows ch
+@[inline]
+def rws (c : Char) : Bool :=
+  ows c

 /--
 obs-text = %x80-FF (and higher Unicode scalar values in this library's `Char` model).
 -/
-def obsText (ch : Char) : Bool :=
-  0x80 ≤ ch.toNat
+@[inline]
+def obsText (c : Char) : Bool :=
+  0x80 ≤ c.toNat

 /--
 reason-phrase character class:
@@ -151,103 +182,114 @@ HTAB / SP / VCHAR

 Reference: https://httpwg.org/specs/rfc9110.html#reason.phrase
 -/
-def reasonPhraseChar (ch : Char) : Bool :=
-  ch matches '\t' | ' ' || vchar ch
-
-/--
-Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F).
-/
-@[expose]
-def isHexDigit (ch : Char) : Bool :=
-  (ch ≥ '0' && ch ≤ '9') ||
-  (ch ≥ 'a' && ch ≤ 'f') ||
-  (ch ≥ 'A' && ch ≤ 'F')
+@[inline]
+def reasonPhraseChar (c : Char) : Bool :=
+  (c matches '\t' | ' ') || vchar c

 /--
 Checks if a character is a hexadecimal digit (0-9, a-f, or A-F).
 -/
-@[expose]
-def isHexDigitByte (ch : Char) : Bool :=
-  isHexDigit ch
+@[inline, expose]
+def isHexDigit (c : Char) : Bool :=
+  (c matches 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F') ||
+  isDigitChar c

 /--
-Checks if `c` is an ASCII alphanumeric character.
+Checks if a byte is a hexadecimal digit (0-9, a-f, or A-F).
 -/
@[inline, expose]
-def isAsciiAlphaNumChar (ch : Char) : Bool :=
-  isAscii ch && Char.isAlphanum ch
+def isHexDigitByte (c : UInt8) : Bool :=
+  (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) ||
+  (c ≥ 'a'.toUInt8 && c ≤ 'f'.toUInt8) ||
+  (c ≥ 'A'.toUInt8 && c ≤ 'F'.toUInt8)
+
+/--
+Checks if a byte is an alphanumeric digit (0-9, a-z, or A-Z).
+-/
+@[inline, expose]
+def isAlphaNum (c : UInt8) : Bool :=
+  (c ≥ '0'.toUInt8 && c ≤ '9'.toUInt8) ||
+  (c ≥ 'a'.toUInt8 && c ≤ 'z'.toUInt8) ||
+  (c ≥ 'A'.toUInt8 && c ≤ 'Z'.toUInt8)
+
+/--
+Checks whether `c` is an ASCII alphanumeric character.
+-/
+@[inline, expose]
+def isAsciiAlphaNumChar (c : Char) : Bool :=
+  isAscii c && (isDigitChar c || isAlphaChar c)

 /--
 Checks if a character is valid after the first character of a URI scheme.
 Valid characters are ASCII alphanumeric, `+`, `-`, and `.`.
 -/
-@[expose]
-def isValidSchemeChar (ch : Char) : Bool :=
-  isAsciiAlphaNumChar ch || ch matches '+' | '-' | '.'
+@[inline, expose]
+def isValidSchemeChar (c : Char) : Bool :=
+  isAsciiAlphaNumChar c || (c matches '+' | '-' | '.')

 /--
 Checks if a character is valid for use in a domain name.
 Valid characters are ASCII alphanumeric, hyphens, and dots.
 -/
-@[expose]
-def isValidDomainNameChar (ch : Char) : Bool :=
-  isAsciiAlphaNumChar ch || ch matches '-' | '.'
+@[inline, expose]
+def isValidDomainNameChar (c : Char) : Bool :=
+  isAsciiAlphaNumChar c || (c matches '-' | '.')

 /--
-Checks if a character is an unreserved character according to RFC 3986. Unreserved characters are:
+Checks if a byte is an unreserved character according to RFC 3986. Unreserved characters are:
 alphanumeric, hyphen, period, underscore, and tilde.
 -/
-@[expose]
-def isUnreserved (ch : Char) : Bool :=
-  Char.isAlphanum ch || ch matches '-' | '.' | '_' | '~'
-
+@[inline, expose]
+def isUnreserved (c : UInt8) : Bool :=
+  isAlphaNum c ||
+  (c = '-'.toUInt8 || c = '.'.toUInt8 || c = '_'.toUInt8 || c = '~'.toUInt8)

 /--
-Checks if a character is a sub-delimiter character according to RFC 3986.
+Checks if a byte is a sub-delimiter character according to RFC 3986.
 Sub-delimiters are: `!`, `$`, `&`, `'`, `(`, `)`, `*`, `+`, `,`, `;`, `=`.
 -/
-@[expose]
-def isSubDelims (ch : Char) : Bool :=
-  ch matches '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
+@[inline, expose]
+def isSubDelims (c : UInt8) : Bool :=
+  c = '!'.toUInt8 || c = '$'.toUInt8 || c = '&'.toUInt8 || c = ('\'' : Char).toUInt8 ||
+  c = '('.toUInt8 || c = ')'.toUInt8 || c = '*'.toUInt8 || c = '+'.toUInt8 ||
+  c = ','.toUInt8 || c = ';'.toUInt8 || c = '='.toUInt8

 /--
-Checks if a character is a valid path character (`pchar`) according to RFC 3986.
+Checks if a byte is a valid path character (`pchar`) according to RFC 3986.
 `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`

 Note: The percent-encoding (`pct-encoded`) is handled separately by `isEncodedChar`,
 so this predicate only covers the non-percent characters.
 -/
-@[expose]
-def isPChar (ch : Char) : Bool :=
-  isUnreserved ch || isSubDelims ch || ch matches ':' | '@'
-
+@[inline, expose]
+def isPChar (c : UInt8) : Bool :=
+  isUnreserved c || isSubDelims c || c = ':'.toUInt8 || c = '@'.toUInt8

 /--
-Checks if a character is a valid character in a URI query component according to RFC 3986.
+Checks if a byte is a valid character in a URI query component according to RFC 3986.
 `query = *( pchar / "/" / "?" )`
 -/
-@[expose]
-def isQueryChar (ch : Char) : Bool :=
-  isPChar ch || ch matches '/' | '?'
-
+@[inline, expose]
+def isQueryChar (c : UInt8) : Bool :=
+  isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8

 /--
-Checks if a character is a valid character in a URI fragment component according to RFC 3986.
+Checks if a byte is a valid character in a URI fragment component according to RFC 3986.
 `fragment = *( pchar / "/" / "?" )`
 -/
-@[expose]
-def isFragmentChar (ch : Char) : Bool :=
-  isPChar ch || ch matches '/' | '?'
+@[inline, expose]
+def isFragmentChar (c : UInt8) : Bool :=
+  isPChar c || c = '/'.toUInt8 || c = '?'.toUInt8

 /--
-Checks if a character is a valid character in a URI userinfo component according to RFC 3986.
+Checks if a byte is a valid character in a URI userinfo component according to RFC 3986.
 `userinfo = *( unreserved/ sub-delims / ":" )`

 Note: It avoids the pct-encoded of the original grammar because it is used with `Encoding.lean`
 that provides it.
 -/
-@[expose]
-def isUserInfoChar (ch : Char) : Bool :=
-  isUnreserved ch || isSubDelims ch || ch matches ':'
+@[inline, expose]
+def isUserInfoChar (c : UInt8) : Bool :=
+  isUnreserved c || isSubDelims c || c = ':'.toUInt8

 end Std.Http.Internal.Char