Compare commits

...

1 Commits

Author SHA1 Message Date
Sofia Rodrigues
13435ed957 feat: more functions in the bytearray parser and move .eof out 2025-07-03 14:03:05 -03:00
4 changed files with 250 additions and 45 deletions

View File

@@ -89,7 +89,7 @@ where
upToWs (nonempty : Bool) : Parser String := fun it =>
let it' := it.find fun c => c.isWhitespace
if nonempty && it'.pos == it.pos then
.error it' "Expected a nonempty string"
.error it' (.other "Expected a nonempty string")
else
.success it' (it.extract it')
@@ -176,7 +176,7 @@ private abbrev ValidationM := Parsec ValidationState
private def ValidationM.run (p : ValidationM α) (input : String) : Except (Nat × String) α :=
match p (.ofSource input) with
| .success _ res => Except.ok res
| .error s err => Except.error (s.getLineNumber, err)
| .error s err => Except.error (s.getLineNumber, toString err)
/--
Matches `p` as many times as possible, followed by EOF. If `p` cannot be matched prior to the end
@@ -286,7 +286,7 @@ where
labelingExampleErrors {α} (header : String) (x : ValidationM α) : ValidationM α := fun s =>
match x s with
| res@(.success ..) => res
| .error s' msg => .error s' s!"Example '{header}' is malformed: {msg}"
| .error s' msg => .error s' (.other s!"Example '{header}' is malformed: {msg}")
/--
If `line` is a level-`level` header and, if `title?` is non-`none`, its title is `title?`,

View File

@@ -1,24 +1,47 @@
/-
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Dany Fabian, Henrik Böving
Author: Dany Fabian, Henrik Böving, Sofia Rodrigues
-/
module
prelude
import Init.NotationExtra
import Init.Data.ToString.Macro
public import Init.NotationExtra
public import Init.Data.ToString.Macro
namespace Std.Internal
public section
namespace Std
namespace Internal
namespace Parsec
/--
Error type for the `ParseResult`. It separates `eof` from the rest of the errors in order to
improve the error handling for this case in parsers that can receive incomplete data and then reparse it.
-/
inductive Error where
| eof
| other (s : String)
deriving Repr
instance : ToString Error where
toString
| .eof => "unexpected end of input"
| .other s => s
/--
The result of parsing some string.
-/
inductive ParseResult (α : Type) (ι : Type) where
| success (pos : ι) (res : α)
| error (pos : ι) (err : String)
| error (pos : ι) (err : Error)
deriving Repr
end Parsec
def Parsec (ι : Type) (α : Type) : Type := ι Parsec.ParseResult α ι
@[expose]
def Parsec (ι : Type) (α : Type) : Type :=
ι Parsec.ParseResult α ι
namespace Parsec
@@ -34,26 +57,21 @@ variable {α : Type} {ι : Type} {elem : Type} {idx : Type}
variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]
instance : Inhabited (Parsec ι α) where
default := fun it => .error it ""
default := fun it => ParseResult.error it (.other "")
@[always_inline, inline]
protected def pure (a : α) : Parsec ι α := fun it =>
.success it a
@[always_inline, inline]
def bind {α β : Type} (f : Parsec ι α) (g : α Parsec ι β) : Parsec ι β := fun it =>
protected def bind {α β : Type} (f : Parsec ι α) (g : α Parsec ι β) : Parsec ι β := fun it =>
match f it with
| .success rem a => g a rem
| .error pos msg => .error pos msg
@[always_inline]
instance : Monad (Parsec ι) where
pure := Parsec.pure
bind := Parsec.bind
@[always_inline, inline]
def fail (msg : String) : Parsec ι α := fun it =>
.error it msg
.error it (.other msg)
@[inline]
def tryCatch (p : Parsec ι α) (csuccess : α Parsec ι β) (cerror : Unit Parsec ι β)
@@ -64,6 +82,11 @@ def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
if Input.pos it = Input.pos rem then cerror () rem else .error rem err
@[always_inline]
instance : Monad (Parsec ι) where
pure := Parsec.pure
bind := Parsec.bind
@[always_inline, inline]
def orElse (p : Parsec ι α) (q : Unit Parsec ι α) : Parsec ι α :=
tryCatch p pure q
@@ -79,12 +102,10 @@ instance : Alternative (Parsec ι) where
failure := fail ""
orElse := orElse
def expectedEndOfInput := "expected end of input"
@[inline]
def eof : Parsec ι Unit := fun it =>
if Input.hasNext it then
.error it expectedEndOfInput
.error it .eof
else
.success it ()
@@ -102,8 +123,9 @@ def many (p : Parsec ι α) : Parsec ι <| Array α := manyCore p #[]
@[inline]
def many1 (p : Parsec ι α) : Parsec ι <| Array α := do manyCore p #[ p]
def unexpectedEndOfInput := "unexpected end of input"
/--
Gets the next input element.
-/
@[inline]
def any : Parsec ι elem := fun it =>
if h : Input.hasNext it then
@@ -111,19 +133,28 @@ def any : Parsec ι elem := fun it =>
let it' := Input.next' it h
.success it' c
else
.error it unexpectedEndOfInput
.error it .eof
/--
Checks if the next input element matches some condition.
-/
@[inline]
def satisfy (p : elem Bool) : Parsec ι elem := attempt do
let c any
if p c then return c else fail "condition not satisfied"
/--
Fails if `p` succeeds, otherwise succeeds without consuming input.
-/
@[inline]
def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it =>
match p it with
| .success _ _ => .error it ""
| .success _ _ => .error it (.other "")
| .error _ _ => .success it ()
/--
Peeks at the next element, returns `some` if exists else `none`, does not consume input.
-/
@[inline]
def peek? : Parsec ι (Option elem) := fun it =>
if h : Input.hasNext it then
@@ -131,13 +162,32 @@ def peek? : Parsec ι (Option elem) := fun it =>
else
.success it none
/--
Peeks at the next element, returns `some elem` if it satisfies `p`, else `none`. Does not consume input.
-/
@[inline]
def peekWhen? (p : elem Bool) : Parsec ι (Option elem) := do
let some data peek?
| return none
if p data then
return some data
else
return none
/--
Peeks at the next element, errors on EOF, does not consume input.
-/
@[inline]
def peek! : Parsec ι elem := fun it =>
if h : Input.hasNext it then
.success it (Input.curr' it h)
else
.error it unexpectedEndOfInput
.error it .eof
/--
Peeks at the next element or returns a default if at EOF, does not consume input.
-/
@[inline]
def peekD (default : elem) : Parsec ι elem := fun it =>
if h : Input.hasNext it then
@@ -145,23 +195,37 @@ def peekD (default : elem) : Parsec ι elem := fun it =>
else
.success it default
/--
Consumes one element if available, otherwise errors on EOF.
-/
@[inline]
def skip : Parsec ι Unit := fun it =>
if h : Input.hasNext it then
.success (Input.next' it h) ()
else
.error it unexpectedEndOfInput
.error it .eof
/--
Parses zero or more chars with `p`, accumulates into a string.
-/
@[specialize]
partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String :=
tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)
/--
Parses zero or more chars with `p` into a string.
-/
@[inline]
def manyChars (p : Parsec ι Char) : Parsec ι String := manyCharsCore p ""
def manyChars (p : Parsec ι Char) : Parsec ι String := do
manyCharsCore p ""
/--
Parses one or more chars with `p` into a string, errors if none.
-/
@[inline]
def many1Chars (p : Parsec ι Char) : Parsec ι String := do manyCharsCore p ( p).toString
def many1Chars (p : Parsec ι Char) : Parsec ι String := do
manyCharsCore p ( p).toString
end Parsec
end Std.Internal
end Internal
end Std

View File

@@ -3,12 +3,17 @@ Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Henrik Böving
-/
prelude
import Std.Internal.Parsec.Basic
import Init.Data.ByteArray.Basic
import Init.Data.String.Extra
module
namespace Std.Internal
prelude
public import Std.Internal.Parsec.Basic
public import Init.Data.ByteArray.Basic
public import Init.Data.String.Extra
public section
namespace Std
namespace Internal
namespace Parsec
namespace ByteArray
@@ -22,27 +27,47 @@ instance : Input ByteArray.Iterator UInt8 Nat where
abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α
/--
Run a `Parser` on a `ByteArray`, returns either the result or an error string with offset.
-/
protected def Parser.run (p : Parser α) (arr : ByteArray) : Except String α :=
match p arr.iter with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.pos}: {err}"
| .error it (.other err) => Except.error s!"offset {repr it.pos}: {err}"
| .error it .eof => Except.error s!"offset {repr it.pos}: end of file"
/--
Parse a single byte equal to `b`, fails if different.
-/
@[inline]
def pbyte (b : UInt8) : Parser UInt8 := attempt do
if ( any) = b then pure b else fail s!"expected: '{b}'"
/--
Skip a single byte equal to `b`, fails if different.
-/
@[inline]
def skipByte (b : UInt8) : Parser Unit := pbyte b *> pure ()
def skipByte (b : UInt8) : Parser Unit :=
pbyte b *> pure ()
/--
Skip a sequence of bytes equal to the given `ByteArray`.
-/
def skipBytes (arr : ByteArray) : Parser Unit := do
for b in arr do
skipByte b
/--
Parse a string by matching its UTF-8 bytes, returns the string on success.
-/
@[inline]
def pstring (s : String) : Parser String := do
skipBytes s.toUTF8
return s
/--
Skip a string by matching its UTF-8 bytes.
-/
@[inline]
def skipString (s : String) : Parser Unit := pstring s *> pure ()
@@ -59,14 +84,24 @@ Skip a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it
@[inline]
def skipByteChar (c : Char) : Parser Unit := skipByte c.toUInt8
/--
Parse an ASCII digit `0-9` as a `Char`.
-/
@[inline]
def digit : Parser Char := attempt do
let b any
if '0'.toUInt8 b b '9'.toUInt8 then return Char.ofUInt8 b else fail s!"digit expected"
/--
Convert a byte representing `'0'..'9'` to a `Nat`.
-/
@[inline]
private def digitToNat (b : UInt8) : Nat := (b - '0'.toUInt8).toNat
private def digitToNat (b : UInt8) : Nat :=
(b - '0'.toUInt8).toNat
/--
Parse zero or more ASCII digits into a `Nat`, continuing until non-digit or EOF.
-/
@[inline]
private partial def digitsCore (acc : Nat) : Parser Nat := fun it =>
/-
@@ -88,11 +123,17 @@ where
else
(acc, it)
/--
Parse one or more ASCII digits into a `Nat`.
-/
@[inline]
def digits : Parser Nat := do
let d digit
digitsCore (digitToNat d.toUInt8)
/--
Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`.
-/
@[inline]
def hexDigit : Parser Char := attempt do
let b any
@@ -100,6 +141,20 @@ def hexDigit : Parser Char := attempt do
('a'.toUInt8 b b 'f'.toUInt8)
('A'.toUInt8 b b 'F'.toUInt8) then return Char.ofUInt8 b else fail s!"hex digit expected"
/--
Parse an octal digit `0-7` as a `Char`.
-/
@[inline]
def octDigit : Parser Char := attempt do
let b any
if '0'.toUInt8 b b '7'.toUInt8 then
return Char.ofUInt8 b
else
fail s!"octal digit expected"
/--
Parse an ASCII letter `a-z` or `A-Z` as a `Char`.
-/
@[inline]
def asciiLetter : Parser Char := attempt do
let b any
@@ -118,17 +173,65 @@ private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator :=
else
it
/--
Skip whitespace: tabs, newlines, carriage returns, and spaces.
-/
@[inline]
def ws : Parser Unit := fun it =>
.success (skipWs it) ()
/--
Parse `n` bytes from the input into a `ByteArray`, errors if not enough bytes.
-/
def take (n : Nat) : Parser ByteArray := fun it =>
let subarr := it.array.extract it.idx (it.idx + n)
if subarr.size != n then
.error it s!"expected: {n} bytes"
.error it .eof
else
.success (it.forward n) subarr
/--
Parses while a predicate is satisfied.
-/
@[inline]
partial def takeWhile (pred : UInt8 Bool) : Parser ByteArray :=
fun it =>
let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : (Nat × ByteArray.Iterator) :=
if ¬iter.hasNext then (count, iter)
else if pred iter.curr then findEnd (count + 1) iter.next
else (count, iter)
let (length, newIt) := findEnd 0 it
.success newIt (it.array.extract it.idx (it.idx + length))
/--
Parses until a predicate is satisfied (exclusive).
-/
@[inline]
def takeUntil (pred : UInt8 Bool) : Parser ByteArray :=
takeWhile (fun b => ¬pred b)
/--
Skips while a predicate is satisfied.
-/
@[inline]
partial def skipWhile (pred : UInt8 Bool) : Parser Unit :=
fun it =>
let rec findEnd (count : Nat) (iter : ByteArray.Iterator) : ByteArray.Iterator :=
if ¬iter.hasNext then iter
else if pred iter.curr then findEnd (count + 1) iter.next
else iter
.success (findEnd 0 it) ()
/--
Skips until a predicate is satisfied.
-/
@[inline]
def skipUntil (pred : UInt8 Bool) : Parser Unit :=
skipWhile (fun b => ¬pred b)
end ByteArray
end Parsec
end Std.Internal
end Internal
end Std

View File

@@ -3,8 +3,12 @@ Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Dany Fabian, Henrik Böving
-/
module
prelude
import Std.Internal.Parsec.Basic
public import Std.Internal.Parsec.Basic
public section
namespace Std.Internal
namespace Parsec
@@ -20,34 +24,55 @@ instance : Input String.Iterator Char String.Pos where
abbrev Parser (α : Type) : Type := Parsec String.Iterator α
/--
Run a `Parser` on a `String`, returns either the result or an error string with offset.
-/
protected def Parser.run (p : Parser α) (s : String) : Except String α :=
match p s.mkIterator with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
| .error it (.other err) => Except.error s!"offset {repr it.pos}: {err}"
| .error it .eof => Except.error s!"offset {repr it.pos}: end of file"
/-- Parses the given string. -/
/--
Parses the given string.
-/
def pstring (s : String) : Parser String := fun it =>
let substr := it.extract (it.forward s.length)
if substr = s then
.success (it.forward s.length) substr
else
.error it s!"expected: {s}"
.error it (.other s!"expected: {s}")
/--
Skips the given string.
-/
@[inline]
def skipString (s : String) : Parser Unit := pstring s *> pure ()
/--
Parses the given char.
-/
@[inline]
def pchar (c : Char) : Parser Char := attempt do
if ( any) = c then pure c else fail s!"expected: '{c}'"
/--
Skips the given char.
-/
@[inline]
def skipChar (c : Char) : Parser Unit := pchar c *> pure ()
/--
Parse an ASCII digit `0-9` as a `Char`.
-/
@[inline]
def digit : Parser Char := attempt do
let c any
if '0' c c '9' then return c else fail s!"digit expected"
/--
Convert a byte representing `'0'..'9'` to a `Nat`.
-/
@[inline]
private def digitToNat (b : Char) : Nat := b.toNat - '0'.toNat
@@ -72,11 +97,17 @@ where
else
(acc, it)
/--
Parse one or more ASCII digits into a `Nat`.
-/
@[inline]
def digits : Parser Nat := do
let d digit
digitsCore (digitToNat d)
/--
Parse a hex digit `0-9`, `a-f`, or `A-F` as a `Char`.
-/
@[inline]
def hexDigit : Parser Char := attempt do
let c any
@@ -84,6 +115,9 @@ def hexDigit : Parser Char := attempt do
('a' c c 'f')
('A' c c 'F') then return c else fail s!"hex digit expected"
/--
Parse an ASCII letter `a-z` or `A-Z` as a `Char`.
-/
@[inline]
def asciiLetter : Parser Char := attempt do
let c any
@@ -99,14 +133,18 @@ private partial def skipWs (it : String.Iterator) : String.Iterator :=
else
it
/--
Skip whitespace: tabs, newlines, carriage returns, and spaces.
-/
@[inline]
def ws : Parser Unit := fun it =>
.success (skipWs it) ()
def take (n : Nat) : Parser String := fun it =>
let substr := it.extract (it.forward n)
if substr.length != n then
.error it s!"expected: {n} codepoints"
.error it .eof
else
.success (it.forward n) substr