Compare commits

...

5 Commits

Author SHA1 Message Date
Henrik Böving
d7fac39841 feat: ByteArray.Iterator support for Parsec 2024-07-18 22:42:51 +02:00
Henrik Böving
331a5d079f feat: ByteArray.Iterator 2024-07-18 22:42:49 +02:00
Henrik Böving
da6df60c22 fix: breakages caused by generalized Parsec 2024-07-17 13:55:32 +02:00
Henrik Böving
6eadcf61bb feat: generalize Parsec over input type 2024-07-17 13:49:25 +02:00
Henrik Böving
b0d24e01fb refactor: split out String related parts of Parsec 2024-07-17 13:49:22 +02:00
9 changed files with 558 additions and 281 deletions

View File

@@ -187,6 +187,121 @@ def foldlM {β : Type v} {m : Type v → Type w} [Monad m] (f : β → UInt8 →
def foldl {β : Type v} (f : β UInt8 β) (init : β) (as : ByteArray) (start := 0) (stop := as.size) : β :=
Id.run <| as.foldlM f init start stop
/-- Iterator over the bytes (`UInt8`) of a `ByteArray`.
Typically created by `arr.iter`, where `arr` is a `ByteArray`.
An iterator is *valid* if the position `i` is *valid* for the array `arr`, meaning `0 ≤ i ≤ arr.size`
Most operations on iterators return arbitrary values if the iterator is not valid. The functions in
the `ByteArray.Iterator` API should rule out the creation of invalid iterators, with two exceptions:
- `Iterator.next iter` is invalid if `iter` is already at the end of the array (`iter.atEnd` is
`true`)
- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the
number of remaining bytes.
-/
structure Iterator where
/-- The array the iterator is for. -/
array : ByteArray
/-- The current position.
This position is not necessarily valid for the array, for instance if one keeps calling
`Iterator.next` when `Iterator.atEnd` is true. If the position is not valid, then the
current byte is `(default : UInt8)`. -/
idx : Nat
deriving Inhabited
/-- Creates an iterator at the beginning of an array. -/
def mkIterator (arr : ByteArray) : Iterator :=
arr, 0
@[inherit_doc mkIterator]
abbrev iter := mkIterator
/-- The size of an array iterator is the number of bytes remaining. -/
instance : SizeOf Iterator where
sizeOf i := i.array.size - i.idx
theorem Iterator.sizeOf_eq (i : Iterator) : sizeOf i = i.array.size - i.idx :=
rfl
namespace Iterator
/-- Number of bytes remaining in the iterator. -/
def remainingBytes : Iterator Nat
| arr, i => arr.size - i
@[inherit_doc Iterator.idx]
def pos := Iterator.idx
/-- The byte at the current position.
On an invalid position, returns `(default : UInt8)`. -/
@[inline]
def curr : Iterator UInt8
| arr, i =>
if h:i < arr.size then
arr[i]'h
else
default
/-- Moves the iterator's position forward by one byte, unconditionally.
It is only valid to call this function if the iterator is not at the end of the array, *i.e.*
`Iterator.atEnd` is `false`; otherwise, the resulting iterator will be invalid. -/
@[inline]
def next : Iterator Iterator
| arr, i => arr, i + 1
/-- Decreases the iterator's position.
If the position is zero, this function is the identity. -/
@[inline]
def prev : Iterator Iterator
| arr, i => arr, i - 1
/-- True if the iterator is past the array's last byte. -/
@[inline]
def atEnd : Iterator Bool
| arr, i => i arr.size
/-- True if the iterator is not past the array's last byte. -/
@[inline]
def hasNext : Iterator Bool
| arr, i => i < arr.size
/-- True if the position is not zero. -/
@[inline]
def hasPrev : Iterator Bool
| _, i => i > 0
/-- Moves the iterator's position to the end of the array.
Note that `i.toEnd.atEnd` is always `true`. -/
@[inline]
def toEnd : Iterator Iterator
| arr, _ => arr, arr.size
/-- Moves the iterator's position several bytes forward.
The resulting iterator is only valid if the number of bytes to skip is less than or equal to
the number of bytes left in the iterator. -/
@[inline]
def forward : Iterator Nat Iterator
| arr, i, f => arr, i + f
@[inherit_doc forward, inline]
def nextn : Iterator Nat Iterator := forward
/-- Moves the iterator's position several bytes back.
If asked to go back more bytes than available, stops at the beginning of the array. -/
@[inline]
def prevn : Iterator Nat Iterator
| arr, i, f => arr, i - f
end Iterator
end ByteArray
def List.toByteArray (bs : List UInt8) : ByteArray :=

View File

@@ -12,10 +12,11 @@ import Lean.Data.RBMap
namespace Lean.Json.Parser
open Lean.Parsec
open Lean.Parsec.String
@[inline]
def hexChar : Parsec Nat := do
let c anyChar
def hexChar : Parser Nat := do
let c any
if '0' c c '9' then
pure $ c.val.toNat - '0'.val.toNat
else if 'a' c c 'f' then
@@ -25,8 +26,8 @@ def hexChar : Parsec Nat := do
else
fail "invalid hex character"
def escapedChar : Parsec Char := do
let c anyChar
def escapedChar : Parser Char := do
let c any
match c with
| '\\' => return '\\'
| '"' => return '"'
@@ -41,13 +42,13 @@ def escapedChar : Parsec Char := do
return Char.ofNat $ 4096*u1 + 256*u2 + 16*u3 + u4
| _ => fail "illegal \\u escape"
partial def strCore (acc : String) : Parsec String := do
partial def strCore (acc : String) : Parser String := do
let c peek!
if c = '"' then -- "
skip
return acc
else
let c anyChar
let c any
if c = '\\' then
strCore (acc.push ( escapedChar))
-- as to whether c.val > 0xffff should be split up and encoded with multiple \u,
@@ -58,9 +59,9 @@ partial def strCore (acc : String) : Parsec String := do
else
fail "unexpected character in string"
def str : Parsec String := strCore ""
def str : Parser String := strCore ""
partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
partial def natCore (acc digits : Nat) : Parser (Nat × Nat) := do
let some c peek? | return (acc, digits)
if '0' c c '9' then
skip
@@ -70,7 +71,7 @@ partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
return (acc, digits)
@[inline]
def lookahead (p : Char Prop) (desc : String) [DecidablePred p] : Parsec Unit := do
def lookahead (p : Char Prop) (desc : String) [DecidablePred p] : Parser Unit := do
let c peek!
if p c then
return ()
@@ -78,22 +79,22 @@ def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parsec Uni
fail <| "expected " ++ desc
@[inline]
def natNonZero : Parsec Nat := do
def natNonZero : Parser Nat := do
lookahead (fun c => '1' c c '9') "1-9"
let (n, _) natCore 0 0
return n
@[inline]
def natNumDigits : Parsec (Nat × Nat) := do
def natNumDigits : Parser (Nat × Nat) := do
lookahead (fun c => '0' c c '9') "digit"
natCore 0 0
@[inline]
def natMaybeZero : Parsec Nat := do
def natMaybeZero : Parser Nat := do
let (n, _) natNumDigits
return n
def num : Parsec JsonNumber := do
def num : Parser JsonNumber := do
let c peek!
let sign if c = '-' then
skip
@@ -132,10 +133,10 @@ def num : Parsec JsonNumber := do
else
return res
partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array Json) := do
partial def arrayCore (anyCore : Parser Json) (acc : Array Json) : Parser (Array Json) := do
let hd anyCore
let acc' := acc.push hd
let c anyChar
let c any
if c = ']' then
ws
return acc'
@@ -145,12 +146,12 @@ partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array
else
fail "unexpected character in array"
partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ => Json)) := do
partial def objectCore (anyCore : Parser Json) : Parser (RBNode String (fun _ => Json)) := do
lookahead (fun c => c = '"') "\""; skip; -- "
let k strCore ""; ws
lookahead (fun c => c = ':') ":"; skip; ws
let v anyCore
let c anyChar
let c any
if c = '}' then
ws
return RBNode.singleton k v
@@ -161,7 +162,7 @@ partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ =>
else
fail "unexpected character in object"
partial def anyCore : Parsec Json := do
partial def anyCore : Parser Json := do
let c peek!
if c = '[' then
skip; ws
@@ -203,7 +204,7 @@ partial def anyCore : Parsec Json := do
fail "unexpected input"
def any : Parsec Json := do
def any : Parser Json := do
ws
let res ← anyCore
eof

View File

@@ -4,181 +4,6 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Dany Fabian
-/
prelude
import Init.NotationExtra
import Init.Data.ToString.Macro
namespace Lean
namespace Parsec
inductive ParseResult (α : Type) where
| success (pos : String.Iterator) (res : α)
| error (pos : String.Iterator) (err : String)
deriving Repr
end Parsec
def Parsec (α : Type) : Type := String.Iterator Lean.Parsec.ParseResult α
namespace Parsec
open ParseResult
instance (α : Type) : Inhabited (Parsec α) :=
λ it => error it ""
@[inline]
protected def pure (a : α) : Parsec α := λ it =>
success it a
@[inline]
def bind {α β : Type} (f : Parsec α) (g : α Parsec β) : Parsec β := λ it =>
match f it with
| success rem a => g a rem
| error pos msg => error pos msg
instance : Monad Parsec :=
{ pure := Parsec.pure, bind }
@[inline]
def fail (msg : String) : Parsec α := fun it =>
error it msg
@[inline]
def tryCatch (p : Parsec α)
(csuccess : α Parsec β)
(cerror : Unit Parsec β)
: Parsec β := fun it =>
match p it with
| .success rem a => csuccess a rem
| .error rem err =>
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
if it.pos = rem.pos then cerror () rem else .error rem err
@[inline]
def orElse (p : Parsec α) (q : Unit Parsec α) : Parsec α :=
tryCatch p pure q
@[inline]
def attempt (p : Parsec α) : Parsec α := λ it =>
match p it with
| success rem res => success rem res
| error _ err => error it err
instance : Alternative Parsec :=
{ failure := fail "", orElse }
protected def run (p : Parsec α) (s : String) : Except String α :=
match p s.mkIterator with
| Parsec.ParseResult.success _ res => Except.ok res
| Parsec.ParseResult.error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
def expectedEndOfInput := "expected end of input"
@[inline]
def eof : Parsec Unit := fun it =>
if it.hasNext then
error it expectedEndOfInput
else
success it ()
@[specialize]
partial def manyCore (p : Parsec α) (acc : Array α) : Parsec $ Array α :=
tryCatch p (manyCore p <| acc.push ·) (fun _ => pure acc)
@[inline]
def many (p : Parsec α) : Parsec $ Array α := manyCore p #[]
@[inline]
def many1 (p : Parsec α) : Parsec $ Array α := do manyCore p #[p]
@[specialize]
partial def manyCharsCore (p : Parsec Char) (acc : String) : Parsec String :=
tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)
@[inline]
def manyChars (p : Parsec Char) : Parsec String := manyCharsCore p ""
@[inline]
def many1Chars (p : Parsec Char) : Parsec String := do manyCharsCore p (p).toString
/-- Parses the given string. -/
def pstring (s : String) : Parsec String := λ it =>
let substr := it.extract (it.forward s.length)
if substr = s then
success (it.forward s.length) substr
else
error it s!"expected: {s}"
@[inline]
def skipString (s : String) : Parsec Unit := pstring s *> pure ()
def unexpectedEndOfInput := "unexpected end of input"
@[inline]
def anyChar : Parsec Char := λ it =>
if it.hasNext then success it.next it.curr else error it unexpectedEndOfInput
@[inline]
def pchar (c : Char) : Parsec Char := attempt do
if (anyChar) = c then pure c else fail s!"expected: '{c}'"
@[inline]
def skipChar (c : Char) : Parsec Unit := pchar c *> pure ()
@[inline]
def digit : Parsec Char := attempt do
let c anyChar
if '0' c c '9' then return c else fail s!"digit expected"
@[inline]
def hexDigit : Parsec Char := attempt do
let c anyChar
if ('0' c c '9')
('a' c c 'f')
('A' c c 'F') then return c else fail s!"hex digit expected"
@[inline]
def asciiLetter : Parsec Char := attempt do
let c anyChar
if ('A' c c 'Z') ('a' c c 'z') then return c else fail s!"ASCII letter expected"
@[inline]
def satisfy (p : Char Bool) : Parsec Char := attempt do
let c anyChar
if p c then return c else fail "condition not satisfied"
@[inline]
def notFollowedBy (p : Parsec α) : Parsec Unit := λ it =>
match p it with
| success _ _ => error it ""
| error _ _ => success it ()
partial def skipWs (it : String.Iterator) : String.Iterator :=
if it.hasNext then
let c := it.curr
if c = '\u0009' c = '\u000a' c = '\u000d' c = '\u0020' then
skipWs it.next
else
it
else
it
@[inline]
def peek? : Parsec (Option Char) := fun it =>
if it.hasNext then
success it it.curr
else
success it none
@[inline]
def peek! : Parsec Char := do
let some c peek? | fail unexpectedEndOfInput
return c
@[inline]
def skip : Parsec Unit := fun it =>
success it.next ()
@[inline]
def ws : Parsec Unit := fun it =>
success (skipWs it) ()
end Parsec
import Lean.Data.Parsec.Basic
import Lean.Data.Parsec.String
import Lean.Data.Parsec.ByteArray

View File

@@ -0,0 +1,144 @@
/-
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Dany Fabian, Henrik Böving
-/
prelude
import Init.NotationExtra
import Init.Data.ToString.Macro
namespace Lean
namespace Parsec
inductive ParseResult (α : Type) (ι : Type) where
| success (pos : ι) (res : α)
| error (pos : ι) (err : String)
deriving Repr
end Parsec
def Parsec (ι : Type) (α : Type) : Type := ι Lean.Parsec.ParseResult α ι
namespace Parsec
class Input (ι : Type) (elem : outParam Type) (idx : outParam Type) [DecidableEq idx] [DecidableEq elem] where
pos : ι idx
next : ι ι
curr : ι elem
hasNext : ι Bool
variable {α : Type} {ι : Type} {elem : Type} {idx : Type}
variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]
instance : Inhabited (Parsec ι α) where
default := fun it => .error it ""
@[inline]
protected def pure (a : α) : Parsec ι α := fun it =>
.success it a
@[inline]
def bind {α β : Type} (f : Parsec ι α) (g : α Parsec ι β) : Parsec ι β := fun it =>
match f it with
| .success rem a => g a rem
| .error pos msg => .error pos msg
instance : Monad (Parsec ι) where
pure := Parsec.pure
bind := Parsec.bind
@[inline]
def fail (msg : String) : Parsec ι α := fun it =>
.error it msg
@[inline]
def tryCatch (p : Parsec ι α) (csuccess : α Parsec ι β) (cerror : Unit Parsec ι β)
: Parsec ι β := fun it =>
match p it with
| .success rem a => csuccess a rem
| .error rem err =>
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
if Input.pos it = Input.pos rem then cerror () rem else .error rem err
@[inline]
def orElse (p : Parsec ι α) (q : Unit Parsec ι α) : Parsec ι α :=
tryCatch p pure q
@[inline]
def attempt (p : Parsec ι α) : Parsec ι α := fun it =>
match p it with
| .success rem res => .success rem res
| .error _ err => .error it err
instance : Alternative (Parsec ι) where
failure := fail ""
orElse := orElse
def expectedEndOfInput := "expected end of input"
@[inline]
def eof : Parsec ι Unit := fun it =>
if Input.hasNext it then
.error it expectedEndOfInput
else
.success it ()
@[specialize]
partial def manyCore (p : Parsec ι α) (acc : Array α) : Parsec ι <| Array α :=
tryCatch p (manyCore p <| acc.push ·) (fun _ => pure acc)
@[inline]
def many (p : Parsec ι α) : Parsec ι <| Array α := manyCore p #[]
@[inline]
def many1 (p : Parsec ι α) : Parsec ι <| Array α := do manyCore p #[ p]
def unexpectedEndOfInput := "unexpected end of input"
@[inline]
def any : Parsec ι elem := fun it =>
if Input.hasNext it then
.success (Input.next it) (Input.curr it)
else
.error it unexpectedEndOfInput
@[inline]
def satisfy (p : elem Bool) : Parsec ι elem := attempt do
let c any
if p c then return c else fail "condition not satisfied"
@[inline]
def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it =>
match p it with
| .success _ _ => .error it ""
| .error _ _ => .success it ()
@[inline]
def peek? : Parsec ι (Option elem) := fun it =>
if Input.hasNext it then
.success it (Input.curr it)
else
.success it none
@[inline]
def peek! : Parsec ι elem := do
let some c peek? | fail unexpectedEndOfInput
return c
@[inline]
def skip : Parsec ι Unit := fun it =>
.success (Input.next it) ()
@[specialize]
partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String :=
tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)
@[inline]
def manyChars (p : Parsec ι Char) : Parsec ι String := manyCharsCore p ""
@[inline]
def many1Chars (p : Parsec ι Char) : Parsec ι String := do manyCharsCore p ( p).toString
end Parsec

View File

@@ -0,0 +1,103 @@
/-
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Henrik Böving
-/
prelude
import Lean.Data.Parsec.Basic
import Init.Data.ByteArray.Basic
import Init.Data.String.Extra
namespace Lean
namespace Parsec
namespace ByteArray
instance : Input ByteArray.Iterator UInt8 Nat where
pos it := it.pos
next it := it.next
curr it := it.curr
hasNext it := it.hasNext
abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α
protected def Parser.run (p : Parser α) (arr : ByteArray) : Except String α :=
match p arr.iter with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.pos}: {err}"
@[inline]
def pbyte (b : UInt8) : Parser UInt8 := attempt do
if ( any) = b then pure b else fail s!"expected: '{b}'"
@[inline]
def skipByte (b : UInt8) : Parser Unit := pbyte b *> pure ()
def skipBytes (arr : ByteArray) : Parser Unit := do
for b in arr do
skipByte b
@[inline]
def pstring (s : String) : Parser String := do
skipBytes s.toUTF8
return s
@[inline]
def skipString (s : String) : Parser Unit := pstring s *> pure ()
/--
Parse a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it is truncated.
-/
@[inline]
def pByteChar (c : Char) : Parser Char := attempt do
if ( any) = c.toUInt8 then pure c else fail s!"expected: '{c}'"
/--
Skip a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it is truncated.
-/
@[inline]
def skipByteChar (c : Char) : Parser Unit := skipByte c.toUInt8
@[inline]
def digit : Parser Char := attempt do
let b any
if '0'.toUInt8 b b '9'.toUInt8 then return Char.ofUInt8 b else fail s!"digit expected"
@[inline]
def hexDigit : Parser Char := attempt do
let b any
if ('0'.toUInt8 b b '9'.toUInt8)
('a'.toUInt8 b b 'f'.toUInt8)
('A'.toUInt8 b b 'F'.toUInt8) then return Char.ofUInt8 b else fail s!"hex digit expected"
@[inline]
def asciiLetter : Parser Char := attempt do
let b any
if ('A'.toUInt8 b b 'Z'.toUInt8) ('a'.toUInt8 b b 'z'.toUInt8) then
return Char.ofUInt8 b
else
fail s!"ASCII letter expected"
private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator :=
if it.hasNext then
let b := it.curr
if b = '\u0009'.toUInt8 b = '\u000a'.toUInt8 b = '\u000d'.toUInt8 b = '\u0020'.toUInt8 then
skipWs it.next
else
it
else
it
@[inline]
def ws : Parser Unit := fun it =>
.success (skipWs it) ()
def take (n : Nat) : Parser ByteArray := fun it =>
let subarr := it.array.extract it.idx (it.idx + n)
if subarr.size != n then
.error it s!"expected: {n} bytes"
else
.success (it.forward n) subarr
end ByteArray
end Parsec
end Lean

View File

@@ -0,0 +1,84 @@
/-
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Author: Dany Fabian, Henrik Böving
-/
prelude
import Lean.Data.Parsec.Basic
namespace Lean
namespace Parsec
namespace String
instance : Input String.Iterator Char String.Pos where
pos it := it.pos
next it := it.next
curr it := it.curr
hasNext it := it.hasNext
abbrev Parser (α : Type) : Type := Parsec String.Iterator α
protected def Parser.run (p : Parser α) (s : String) : Except String α :=
match p s.mkIterator with
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
/-- Parses the given string. -/
def pstring (s : String) : Parser String := fun it =>
let substr := it.extract (it.forward s.length)
if substr = s then
.success (it.forward s.length) substr
else
.error it s!"expected: {s}"
@[inline]
def skipString (s : String) : Parser Unit := pstring s *> pure ()
@[inline]
def pchar (c : Char) : Parser Char := attempt do
if ( any) = c then pure c else fail s!"expected: '{c}'"
@[inline]
def skipChar (c : Char) : Parser Unit := pchar c *> pure ()
@[inline]
def digit : Parser Char := attempt do
let c any
if '0' c c '9' then return c else fail s!"digit expected"
@[inline]
def hexDigit : Parser Char := attempt do
let c any
if ('0' c c '9')
('a' c c 'f')
('A' c c 'F') then return c else fail s!"hex digit expected"
@[inline]
def asciiLetter : Parser Char := attempt do
let c any
if ('A' c c 'Z') ('a' c c 'z') then return c else fail s!"ASCII letter expected"
private partial def skipWs (it : String.Iterator) : String.Iterator :=
if it.hasNext then
let c := it.curr
if c = '\u0009' c = '\u000a' c = '\u000d' c = '\u0020' then
skipWs it.next
else
it
else
it
@[inline]
def ws : Parser Unit := fun it =>
.success (skipWs it) ()
def take (n : Nat) : Parser String := fun it =>
let substr := it.extract (it.forward n)
if substr.length != n then
.error it s!"expected: {n} codepoints"
else
.success (it.forward n) substr
end String
end Parsec
end Lean

View File

@@ -13,23 +13,24 @@ namespace Lean
namespace Xml
namespace Parser
open Lean.Parsec
open Parsec.ParseResult
open Lean.Parsec.String
abbrev LeanChar := Char
/-- consume a newline character sequence pretending, that we read '\n'. As per spec:
https://www.w3.org/TR/xml/#sec-line-ends -/
def endl : Parsec LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n'
def endl : Parser LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n'
def quote (p : Parsec α) : Parsec α :=
def quote (p : Parser α) : Parser α :=
skipChar '\'' *> p <* skipChar '\''
<|> skipChar '"' *> p <* skipChar '"'
/-- https://www.w3.org/TR/xml/#NT-Char -/
def Char : Parsec LeanChar :=
def Char : Parser LeanChar :=
(attempt do
let c anyChar
let c any
let cNat := c.toNat
if (0x20 cNat cNat 0xD7FF)
(0xE000 cNat cNat 0xFFFD)
@@ -37,11 +38,11 @@ def Char : Parsec LeanChar :=
<|> pchar '\t' <|> endl
/-- https://www.w3.org/TR/xml/#NT-S -/
def S : Parsec String :=
def S : Parser String :=
many1Chars (pchar ' ' <|> endl <|> pchar '\t')
/-- https://www.w3.org/TR/xml/#NT-Eq -/
def Eq : Parsec Unit :=
def Eq : Parser Unit :=
optional S *> skipChar '=' <* optional S
private def nameStartCharRanges : Array (Nat × Nat) :=
@@ -59,8 +60,8 @@ private def nameStartCharRanges : Array (Nat × Nat) :=
(0x10000, 0xEFFFF)]
/-- https://www.w3.org/TR/xml/#NT-NameStartChar -/
def NameStartChar : Parsec LeanChar := attempt do
let c anyChar
def NameStartChar : Parser LeanChar := attempt do
let c any
if ('A' c c 'Z') ('a' c c 'z') then pure c
else if c = ':' c = '_' then pure c
else
@@ -69,44 +70,44 @@ def NameStartChar : Parsec LeanChar := attempt do
else fail "expected a name character"
/-- https://www.w3.org/TR/xml/#NT-NameChar -/
def NameChar : Parsec LeanChar :=
def NameChar : Parser LeanChar :=
NameStartChar <|> digit <|> pchar '-' <|> pchar '.' <|> pchar '\xB7'
<|> satisfy (λ c => ('\u0300' c c '\u036F') ('\u203F' c c '\u2040'))
/-- https://www.w3.org/TR/xml/#NT-Name -/
def Name : Parsec String := do
def Name : Parser String := do
let x NameStartChar
manyCharsCore NameChar x.toString
/-- https://www.w3.org/TR/xml/#NT-VersionNum -/
def VersionNum : Parsec Unit :=
def VersionNum : Parser Unit :=
skipString "1." <* (many1 digit)
/-- https://www.w3.org/TR/xml/#NT-VersionInfo -/
def VersionInfo : Parsec Unit := do
def VersionInfo : Parser Unit := do
S *>
skipString "version"
Eq
quote VersionNum
/-- https://www.w3.org/TR/xml/#NT-EncName -/
def EncName : Parsec String := do
def EncName : Parser String := do
let x asciiLetter
manyCharsCore (asciiLetter <|> digit <|> pchar '-' <|> pchar '_' <|> pchar '.') x.toString
/-- https://www.w3.org/TR/xml/#NT-EncodingDecl -/
def EncodingDecl : Parsec String := do
def EncodingDecl : Parser String := do
S *>
skipString "encoding"
Eq
quote EncName
/-- https://www.w3.org/TR/xml/#NT-SDDecl -/
def SDDecl : Parsec String := do
def SDDecl : Parser String := do
S *> skipString "standalone" *> Eq *> quote (pstring "yes" <|> pstring "no")
/-- https://www.w3.org/TR/xml/#NT-XMLDecl -/
def XMLdecl : Parsec Unit := do
def XMLdecl : Parser Unit := do
skipString "<?xml"
VersionInfo
optional EncodingDecl *>
@@ -115,7 +116,7 @@ def XMLdecl : Parsec Unit := do
skipString "?>"
/-- https://www.w3.org/TR/xml/#NT-Comment -/
def Comment : Parsec String :=
def Comment : Parser String :=
let notDash := Char.toString <$> satisfy (λ c => c '-')
skipString "<!--" *>
Array.foldl String.append "" <$> many (attempt <| notDash <|> (do
@@ -125,45 +126,45 @@ def Comment : Parsec String :=
<* skipString "-->"
/-- https://www.w3.org/TR/xml/#NT-PITarget -/
def PITarget : Parsec String :=
def PITarget : Parser String :=
Name <* (skipChar 'X' <|> skipChar 'x') <* (skipChar 'M' <|> skipChar 'm') <* (skipChar 'L' <|> skipChar 'l')
/-- https://www.w3.org/TR/xml/#NT-PI -/
def PI : Parsec Unit := do
def PI : Parser Unit := do
skipString "<?"
<* PITarget <*
optional (S *> manyChars (notFollowedBy (skipString "?>") *> Char))
skipString "?>"
/-- https://www.w3.org/TR/xml/#NT-Misc -/
def Misc : Parsec Unit :=
def Misc : Parser Unit :=
Comment *> pure () <|> PI <|> S *> pure ()
/-- https://www.w3.org/TR/xml/#NT-SystemLiteral -/
def SystemLiteral : Parsec String :=
def SystemLiteral : Parser String :=
pchar '"' *> manyChars (satisfy λ c => c ≠ '"') <* pchar '"'
<|> pchar '\'' *> manyChars (satisfy λ c => c ≠ '\'') <* pure '\''
/-- https://www.w3.org/TR/xml/#NT-PubidChar -/
def PubidChar : Parsec LeanChar :=
def PubidChar : Parser LeanChar :=
asciiLetter <|> digit <|> endl <|> attempt do
let c ← anyChar
let c ← any
if "-'()+,./:=?;!*#@$_%".contains c then pure c else fail "PublidChar expected"
/-- https://www.w3.org/TR/xml/#NT-PubidLiteral -/
def PubidLiteral : Parsec String :=
def PubidLiteral : Parser String :=
pchar '"' *> manyChars PubidChar <* pchar '"'
<|> pchar '\'' *> manyChars (attempt do
let c ← PubidChar
if c = '\'' then fail "'\\'' not expected" else pure c) <* pchar '\''
/-- https://www.w3.org/TR/xml/#NT-ExternalID -/
def ExternalID : Parsec Unit :=
def ExternalID : Parser Unit :=
skipString "SYSTEM" *> S *> SystemLiteral *> pure ()
<|> skipString "PUBLIC" *> S *> PubidLiteral *> S *> SystemLiteral *> pure ()
/-- https://www.w3.org/TR/xml/#NT-Mixed -/
def Mixed : Parsec Unit :=
def Mixed : Parser Unit :=
(do
skipChar '('
optional S *>
@@ -175,11 +176,11 @@ def Mixed : Parsec Unit :=
mutual
/-- https://www.w3.org/TR/xml/#NT-cp -/
partial def cp : Parsec Unit :=
partial def cp : Parser Unit :=
(Name *> pure () <|> choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
/-- https://www.w3.org/TR/xml/#NT-choice -/
partial def choice : Parsec Unit := do
partial def choice : Parser Unit := do
skipChar '('
optional S *>
cp
@@ -188,7 +189,7 @@ mutual
skipChar ')'
/-- https://www.w3.org/TR/xml/#NT-seq -/
partial def seq : Parsec Unit := do
partial def seq : Parser Unit := do
skipChar '('
optional S *>
cp
@@ -198,15 +199,15 @@ mutual
end
/-- https://www.w3.org/TR/xml/#NT-children -/
def children : Parsec Unit :=
def children : Parser Unit :=
(choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
/-- https://www.w3.org/TR/xml/#NT-contentspec -/
def contentspec : Parsec Unit := do
def contentspec : Parser Unit := do
skipString "EMPTY" <|> skipString "ANY" <|> Mixed <|> children
/-- https://www.w3.org/TR/xml/#NT-elementdecl -/
def elementDecl : Parsec Unit := do
def elementDecl : Parser Unit := do
skipString "<!ELEMENT"
S *>
Name *>
@@ -215,11 +216,11 @@ def elementDecl : Parsec Unit := do
skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-StringType -/
def StringType : Parsec Unit :=
def StringType : Parser Unit :=
skipString "CDATA"
/-- https://www.w3.org/TR/xml/#NT-TokenizedType -/
def TokenizedType : Parsec Unit :=
def TokenizedType : Parser Unit :=
skipString "ID"
<|> skipString "IDREF"
<|> skipString "IDREFS"
@@ -229,7 +230,7 @@ def TokenizedType : Parsec Unit :=
<|> skipString "NMTOKENS"
/-- https://www.w3.org/TR/xml/#NT-NotationType -/
def NotationType : Parsec Unit := do
def NotationType : Parser Unit := do
skipString "NOTATION"
S *>
skipChar '(' <*
@@ -239,11 +240,11 @@ def NotationType : Parsec Unit := do
skipChar ')'
/-- https://www.w3.org/TR/xml/#NT-Nmtoken -/
def Nmtoken : Parsec String := do
def Nmtoken : Parser String := do
many1Chars NameChar
/-- https://www.w3.org/TR/xml/#NT-Enumeration -/
def Enumeration : Parsec Unit := do
def Enumeration : Parser Unit := do
skipChar '('
optional S *>
Nmtoken *> many (optional S *> skipChar '|' *> optional S *> Nmtoken) *>
@@ -251,11 +252,11 @@ def Enumeration : Parsec Unit := do
skipChar ')'
/-- https://www.w3.org/TR/xml/#NT-EnumeratedType -/
def EnumeratedType : Parsec Unit :=
def EnumeratedType : Parser Unit :=
NotationType <|> Enumeration
/-- https://www.w3.org/TR/xml/#NT-AttType -/
def AttType : Parsec Unit :=
def AttType : Parser Unit :=
StringType <|> TokenizedType <|> EnumeratedType
def predefinedEntityToChar : String → Option LeanChar
@@ -267,7 +268,7 @@ def predefinedEntityToChar : String → Option LeanChar
| _ => none
/-- https://www.w3.org/TR/xml/#NT-EntityRef -/
def EntityRef : Parsec $ Option LeanChar := attempt $
def EntityRef : Parser $ Option LeanChar := attempt $
skipChar '&' *> predefinedEntityToChar <$> Name <* skipChar ';'
@[inline]
@@ -280,7 +281,7 @@ def digitsToNat (base : Nat) (digits : Array Nat) : Nat :=
digits.foldl (λ r d => r * base + d) 0
/-- https://www.w3.org/TR/xml/#NT-CharRef -/
def CharRef : Parsec LeanChar := do
def CharRef : Parser LeanChar := do
skipString "&#"
let charCode
digitsToNat 10 <$> many1 (hexDigitToNat <$> digit)
@@ -289,11 +290,11 @@ def CharRef : Parsec LeanChar := do
return Char.ofNat charCode
/-- https://www.w3.org/TR/xml/#NT-Reference -/
def Reference : Parsec $ Option LeanChar :=
def Reference : Parser $ Option LeanChar :=
EntityRef <|> some <$> CharRef
/-- https://www.w3.org/TR/xml/#NT-AttValue -/
def AttValue : Parsec String := do
def AttValue : Parser String := do
let chars
(do
skipChar '"'
@@ -306,25 +307,25 @@ def AttValue : Parsec String := do
return chars.foldl (λ s c => if let some c := c then s.push c else s) ""
/-- https://www.w3.org/TR/xml/#NT-DefaultDecl -/
def DefaultDecl : Parsec Unit :=
def DefaultDecl : Parser Unit :=
skipString "#REQUIRED"
<|> skipString "#IMPLIED"
<|> optional (skipString "#FIXED" <* S) *> AttValue *> pure ()
/-- https://www.w3.org/TR/xml/#NT-AttDef -/
def AttDef : Parsec Unit :=
def AttDef : Parser Unit :=
S *> Name *> S *> AttType *> S *> DefaultDecl
/-- https://www.w3.org/TR/xml/#NT-AttlistDecl -/
def AttlistDecl : Parsec Unit :=
def AttlistDecl : Parser Unit :=
skipString "<!ATTLIST" *> S *> Name *> many AttDef *> optional S *> skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-PEReference -/
def PEReference : Parsec Unit :=
def PEReference : Parser Unit :=
skipChar '%' *> Name *> skipChar ';'
/-- https://www.w3.org/TR/xml/#NT-EntityValue -/
def EntityValue : Parsec String := do
def EntityValue : Parser String := do
let chars ←
(do
skipChar '"'
@@ -338,51 +339,51 @@ def EntityValue : Parsec String := do
/-- https://www.w3.org/TR/xml/#NT-NDataDecl -/
def NDataDecl : Parsec Unit :=
def NDataDecl : Parser Unit :=
S *> skipString "NDATA" <* S <* Name
/-- https://www.w3.org/TR/xml/#NT-EntityDef -/
def EntityDef : Parsec Unit :=
def EntityDef : Parser Unit :=
EntityValue *> pure () <|> (ExternalID <* optional NDataDecl)
/-- https://www.w3.org/TR/xml/#NT-GEDecl -/
def GEDecl : Parsec Unit :=
def GEDecl : Parser Unit :=
skipString "<!ENTITY" *> S *> Name *> S *> EntityDef *> optional S *> skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-PEDef -/
def PEDef : Parsec Unit :=
def PEDef : Parser Unit :=
EntityValue *> pure () <|> ExternalID
/-- https://www.w3.org/TR/xml/#NT-PEDecl -/
def PEDecl : Parsec Unit :=
def PEDecl : Parser Unit :=
skipString "<!ENTITY" *> S *> skipChar '%' *> S *> Name *> PEDef *> optional S *> skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-EntityDecl -/
def EntityDecl : Parsec Unit :=
def EntityDecl : Parser Unit :=
GEDecl <|> PEDecl
/-- https://www.w3.org/TR/xml/#NT-PublicID -/
def PublicID : Parsec Unit :=
def PublicID : Parser Unit :=
skipString "PUBLIC" <* S <* PubidLiteral
/-- https://www.w3.org/TR/xml/#NT-NotationDecl -/
def NotationDecl : Parsec Unit :=
def NotationDecl : Parser Unit :=
skipString "<!NOTATION" *> S *> Name *> (ExternalID <|> PublicID) *> optional S *> skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-markupdecl -/
def markupDecl : Parsec Unit :=
def markupDecl : Parser Unit :=
elementDecl <|> AttlistDecl <|> EntityDecl <|> NotationDecl <|> PI <|> (Comment *> pure ())
/-- https://www.w3.org/TR/xml/#NT-DeclSep -/
def DeclSep : Parsec Unit :=
def DeclSep : Parser Unit :=
PEReference <|> S *> pure ()
/-- https://www.w3.org/TR/xml/#NT-intSubset -/
def intSubset : Parsec Unit :=
def intSubset : Parser Unit :=
many (markupDecl <|> DeclSep) *> pure ()
/-- https://www.w3.org/TR/xml/#NT-doctypedecl -/
def doctypedecl : Parsec Unit := do
def doctypedecl : Parser Unit := do
skipString "<!DOCTYPE"
S *>
Name *>
@@ -392,19 +393,19 @@ def doctypedecl : Parsec Unit := do
skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-prolog -/
def prolog : Parsec Unit :=
def prolog : Parser Unit :=
optional XMLdecl *>
many Misc *>
optional (doctypedecl <* many Misc) *> pure ()
/-- https://www.w3.org/TR/xml/#NT-Attribute -/
def Attribute : Parsec (String × String) := do
def Attribute : Parser (String × String) := do
let name Name
Eq
let value AttValue
return (name, value)
protected def elementPrefix : Parsec (Array Content Element) := do
protected def elementPrefix : Parser (Array Content Element) := do
skipChar '<'
let name Name
let attributes many (attempt <| S *> Attribute)
@@ -412,40 +413,40 @@ protected def elementPrefix : Parsec (Array Content → Element) := do
return Element.Element name (RBMap.fromList attributes.toList compare)
/-- https://www.w3.org/TR/xml/#NT-EmptyElemTag -/
def EmptyElemTag (elem : Array Content Element) : Parsec Element := do
def EmptyElemTag (elem : Array Content Element) : Parser Element := do
skipString "/>" *> pure (elem #[])
/-- https://www.w3.org/TR/xml/#NT-STag -/
def STag (elem : Array Content Element) : Parsec (Array Content Element) := do
def STag (elem : Array Content Element) : Parser (Array Content Element) := do
skipChar '>' *> pure elem
/-- https://www.w3.org/TR/xml/#NT-ETag -/
def ETag : Parsec Unit :=
def ETag : Parser Unit :=
skipString "</" *> Name *> optional S *> skipChar '>'
/-- https://www.w3.org/TR/xml/#NT-CDStart -/
def CDStart : Parsec Unit :=
def CDStart : Parser Unit :=
skipString "<![CDATA["
/-- https://www.w3.org/TR/xml/#NT-CDEnd -/
def CDEnd : Parsec Unit :=
def CDEnd : Parser Unit :=
skipString "]]>"
/-- https://www.w3.org/TR/xml/#NT-CData -/
def CData : Parsec String :=
manyChars (notFollowedBy (skipString "]]>") *> anyChar)
def CData : Parser String :=
manyChars (notFollowedBy (skipString "]]>") *> any)
/-- https://www.w3.org/TR/xml/#NT-CDSect -/
def CDSect : Parsec String :=
def CDSect : Parser String :=
CDStart *> CData <* CDEnd
/-- https://www.w3.org/TR/xml/#NT-CharData -/
def CharData : Parsec String :=
def CharData : Parser String :=
notFollowedBy (skipString "]]>") *> manyChars (satisfy λ c => c '<' c '&')
mutual
/-- https://www.w3.org/TR/xml/#NT-content -/
partial def content : Parsec (Array Content) := do
partial def content : Parser (Array Content) := do
let x optional (Content.Character <$> CharData)
let xs many do
let y
@@ -468,20 +469,20 @@ mutual
return res
/-- https://www.w3.org/TR/xml/#NT-element -/
partial def element : Parsec Element := do
partial def element : Parser Element := do
let elem Parser.elementPrefix
EmptyElemTag elem <|> STag elem <*> content <* ETag
end
/-- https://www.w3.org/TR/xml/#NT-document -/
def document : Parsec Element := prolog *> element <* many Misc <* eof
def document : Parser Element := prolog *> element <* many Misc <* eof
end Parser
def parse (s : String) : Except String Element :=
match Xml.Parser.document s.mkIterator with
| Parsec.ParseResult.success _ res => Except.ok res
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx.repr}: {err}\n{(it.prevn 10).extract it}"
| .success _ res => Except.ok res
| .error it err => Except.error s!"offset {it.i.byteIdx.repr}: {err}\n{(it.prevn 10).extract it}"
end Xml

View File

@@ -1,5 +1,5 @@
import Lean.Data.Parsec
open Lean Parsec
open Lean Parsec String
@[macro_inline] -- Error
def f : Nat Nat
@@ -19,7 +19,7 @@ def h : Nat → Nat → Nat
termination_by x y => x
@[macro_inline] -- Error
partial def skipMany (p : Parsec α) (it : String.Iterator) : Parsec PUnit := do
partial def skipMany (p : Parser α) (it : String.Iterator) : Parser PUnit := do
match p it with
| .success it _ => skipMany p it
| .error _ _ => pure ()

View File

@@ -57,12 +57,16 @@ def Lean.Widget.GetWidgetsResponse.debugJson (r : Widget.GetWidgetsResponse) : J
)
]
def word : Parsec String :=
Parsec.many1Chars <| Parsec.digit <|> Parsec.asciiLetter <|> Parsec.pchar '_'
open Parsec in
open Parsec.String in
def word : Parser String :=
many1Chars <| digit <|> asciiLetter <|> pchar '_'
def ident : Parsec Name := do
open Parsec in
open Parsec.String in
def ident : Parser Name := do
let head word
let xs Parsec.many1 (Parsec.pchar '.' *> word)
let xs many1 (pchar '.' *> word)
return xs.foldl .str $ .mkSimple head
partial def main (args : List String) : IO Unit := do