mirror of
https://github.com/leanprover/lean4.git
synced 2026-03-17 18:34:06 +00:00
Compare commits
5 Commits
57df23f27e
...
hbv/genera
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7fac39841 | ||
|
|
331a5d079f | ||
|
|
da6df60c22 | ||
|
|
6eadcf61bb | ||
|
|
b0d24e01fb |
@@ -187,6 +187,121 @@ def foldlM {β : Type v} {m : Type v → Type w} [Monad m] (f : β → UInt8 →
|
||||
def foldl {β : Type v} (f : β → UInt8 → β) (init : β) (as : ByteArray) (start := 0) (stop := as.size) : β :=
|
||||
Id.run <| as.foldlM f init start stop
|
||||
|
||||
/-- Iterator over the bytes (`UInt8`) of a `ByteArray`.
|
||||
|
||||
Typically created by `arr.iter`, where `arr` is a `ByteArray`.
|
||||
|
||||
An iterator is *valid* if the position `i` is *valid* for the array `arr`, meaning `0 ≤ i ≤ arr.size`
|
||||
|
||||
Most operations on iterators return arbitrary values if the iterator is not valid. The functions in
|
||||
the `ByteArray.Iterator` API should rule out the creation of invalid iterators, with two exceptions:
|
||||
|
||||
- `Iterator.next iter` is invalid if `iter` is already at the end of the array (`iter.atEnd` is
|
||||
`true`)
|
||||
- `Iterator.forward iter n`/`Iterator.nextn iter n` is invalid if `n` is strictly greater than the
|
||||
number of remaining bytes.
|
||||
-/
|
||||
structure Iterator where
|
||||
/-- The array the iterator is for. -/
|
||||
array : ByteArray
|
||||
/-- The current position.
|
||||
|
||||
This position is not necessarily valid for the array, for instance if one keeps calling
|
||||
`Iterator.next` when `Iterator.atEnd` is true. If the position is not valid, then the
|
||||
current byte is `(default : UInt8)`. -/
|
||||
idx : Nat
|
||||
deriving Inhabited
|
||||
|
||||
/-- Creates an iterator at the beginning of an array. -/
|
||||
def mkIterator (arr : ByteArray) : Iterator :=
|
||||
⟨arr, 0⟩
|
||||
|
||||
@[inherit_doc mkIterator]
|
||||
abbrev iter := mkIterator
|
||||
|
||||
/-- The size of an array iterator is the number of bytes remaining. -/
|
||||
instance : SizeOf Iterator where
|
||||
sizeOf i := i.array.size - i.idx
|
||||
|
||||
theorem Iterator.sizeOf_eq (i : Iterator) : sizeOf i = i.array.size - i.idx :=
|
||||
rfl
|
||||
|
||||
namespace Iterator
|
||||
|
||||
/-- Number of bytes remaining in the iterator. -/
|
||||
def remainingBytes : Iterator → Nat
|
||||
| ⟨arr, i⟩ => arr.size - i
|
||||
|
||||
@[inherit_doc Iterator.idx]
|
||||
def pos := Iterator.idx
|
||||
|
||||
/-- The byte at the current position.
|
||||
|
||||
On an invalid position, returns `(default : UInt8)`. -/
|
||||
@[inline]
|
||||
def curr : Iterator → UInt8
|
||||
| ⟨arr, i⟩ =>
|
||||
if h:i < arr.size then
|
||||
arr[i]'h
|
||||
else
|
||||
default
|
||||
|
||||
/-- Moves the iterator's position forward by one byte, unconditionally.
|
||||
|
||||
It is only valid to call this function if the iterator is not at the end of the array, *i.e.*
|
||||
`Iterator.atEnd` is `false`; otherwise, the resulting iterator will be invalid. -/
|
||||
@[inline]
|
||||
def next : Iterator → Iterator
|
||||
| ⟨arr, i⟩ => ⟨arr, i + 1⟩
|
||||
|
||||
/-- Decreases the iterator's position.
|
||||
|
||||
If the position is zero, this function is the identity. -/
|
||||
@[inline]
|
||||
def prev : Iterator → Iterator
|
||||
| ⟨arr, i⟩ => ⟨arr, i - 1⟩
|
||||
|
||||
/-- True if the iterator is past the array's last byte. -/
|
||||
@[inline]
|
||||
def atEnd : Iterator → Bool
|
||||
| ⟨arr, i⟩ => i ≥ arr.size
|
||||
|
||||
/-- True if the iterator is not past the array's last byte. -/
|
||||
@[inline]
|
||||
def hasNext : Iterator → Bool
|
||||
| ⟨arr, i⟩ => i < arr.size
|
||||
|
||||
/-- True if the position is not zero. -/
|
||||
@[inline]
|
||||
def hasPrev : Iterator → Bool
|
||||
| ⟨_, i⟩ => i > 0
|
||||
|
||||
/-- Moves the iterator's position to the end of the array.
|
||||
|
||||
Note that `i.toEnd.atEnd` is always `true`. -/
|
||||
@[inline]
|
||||
def toEnd : Iterator → Iterator
|
||||
| ⟨arr, _⟩ => ⟨arr, arr.size⟩
|
||||
|
||||
/-- Moves the iterator's position several bytes forward.
|
||||
|
||||
The resulting iterator is only valid if the number of bytes to skip is less than or equal to
|
||||
the number of bytes left in the iterator. -/
|
||||
@[inline]
|
||||
def forward : Iterator → Nat → Iterator
|
||||
| ⟨arr, i⟩, f => ⟨arr, i + f⟩
|
||||
|
||||
@[inherit_doc forward, inline]
|
||||
def nextn : Iterator → Nat → Iterator := forward
|
||||
|
||||
/-- Moves the iterator's position several bytes back.
|
||||
|
||||
If asked to go back more bytes than available, stops at the beginning of the array. -/
|
||||
@[inline]
|
||||
def prevn : Iterator → Nat → Iterator
|
||||
| ⟨arr, i⟩, f => ⟨arr, i - f⟩
|
||||
|
||||
end Iterator
|
||||
end ByteArray
|
||||
|
||||
def List.toByteArray (bs : List UInt8) : ByteArray :=
|
||||
|
||||
@@ -12,10 +12,11 @@ import Lean.Data.RBMap
|
||||
namespace Lean.Json.Parser
|
||||
|
||||
open Lean.Parsec
|
||||
open Lean.Parsec.String
|
||||
|
||||
@[inline]
|
||||
def hexChar : Parsec Nat := do
|
||||
let c ← anyChar
|
||||
def hexChar : Parser Nat := do
|
||||
let c ← any
|
||||
if '0' ≤ c ∧ c ≤ '9' then
|
||||
pure $ c.val.toNat - '0'.val.toNat
|
||||
else if 'a' ≤ c ∧ c ≤ 'f' then
|
||||
@@ -25,8 +26,8 @@ def hexChar : Parsec Nat := do
|
||||
else
|
||||
fail "invalid hex character"
|
||||
|
||||
def escapedChar : Parsec Char := do
|
||||
let c ← anyChar
|
||||
def escapedChar : Parser Char := do
|
||||
let c ← any
|
||||
match c with
|
||||
| '\\' => return '\\'
|
||||
| '"' => return '"'
|
||||
@@ -41,13 +42,13 @@ def escapedChar : Parsec Char := do
|
||||
return Char.ofNat $ 4096*u1 + 256*u2 + 16*u3 + u4
|
||||
| _ => fail "illegal \\u escape"
|
||||
|
||||
partial def strCore (acc : String) : Parsec String := do
|
||||
partial def strCore (acc : String) : Parser String := do
|
||||
let c ← peek!
|
||||
if c = '"' then -- "
|
||||
skip
|
||||
return acc
|
||||
else
|
||||
let c ← anyChar
|
||||
let c ← any
|
||||
if c = '\\' then
|
||||
strCore (acc.push (← escapedChar))
|
||||
-- as to whether c.val > 0xffff should be split up and encoded with multiple \u,
|
||||
@@ -58,9 +59,9 @@ partial def strCore (acc : String) : Parsec String := do
|
||||
else
|
||||
fail "unexpected character in string"
|
||||
|
||||
def str : Parsec String := strCore ""
|
||||
def str : Parser String := strCore ""
|
||||
|
||||
partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
|
||||
partial def natCore (acc digits : Nat) : Parser (Nat × Nat) := do
|
||||
let some c ← peek? | return (acc, digits)
|
||||
if '0' ≤ c ∧ c ≤ '9' then
|
||||
skip
|
||||
@@ -70,7 +71,7 @@ partial def natCore (acc digits : Nat) : Parsec (Nat × Nat) := do
|
||||
return (acc, digits)
|
||||
|
||||
@[inline]
|
||||
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parsec Unit := do
|
||||
def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parser Unit := do
|
||||
let c ← peek!
|
||||
if p c then
|
||||
return ()
|
||||
@@ -78,22 +79,22 @@ def lookahead (p : Char → Prop) (desc : String) [DecidablePred p] : Parsec Uni
|
||||
fail <| "expected " ++ desc
|
||||
|
||||
@[inline]
|
||||
def natNonZero : Parsec Nat := do
|
||||
def natNonZero : Parser Nat := do
|
||||
lookahead (fun c => '1' ≤ c ∧ c ≤ '9') "1-9"
|
||||
let (n, _) ← natCore 0 0
|
||||
return n
|
||||
|
||||
@[inline]
|
||||
def natNumDigits : Parsec (Nat × Nat) := do
|
||||
def natNumDigits : Parser (Nat × Nat) := do
|
||||
lookahead (fun c => '0' ≤ c ∧ c ≤ '9') "digit"
|
||||
natCore 0 0
|
||||
|
||||
@[inline]
|
||||
def natMaybeZero : Parsec Nat := do
|
||||
def natMaybeZero : Parser Nat := do
|
||||
let (n, _) ← natNumDigits
|
||||
return n
|
||||
|
||||
def num : Parsec JsonNumber := do
|
||||
def num : Parser JsonNumber := do
|
||||
let c ← peek!
|
||||
let sign ← if c = '-' then
|
||||
skip
|
||||
@@ -132,10 +133,10 @@ def num : Parsec JsonNumber := do
|
||||
else
|
||||
return res
|
||||
|
||||
partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array Json) := do
|
||||
partial def arrayCore (anyCore : Parser Json) (acc : Array Json) : Parser (Array Json) := do
|
||||
let hd ← anyCore
|
||||
let acc' := acc.push hd
|
||||
let c ← anyChar
|
||||
let c ← any
|
||||
if c = ']' then
|
||||
ws
|
||||
return acc'
|
||||
@@ -145,12 +146,12 @@ partial def arrayCore (anyCore : Parsec Json) (acc : Array Json) : Parsec (Array
|
||||
else
|
||||
fail "unexpected character in array"
|
||||
|
||||
partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ => Json)) := do
|
||||
partial def objectCore (anyCore : Parser Json) : Parser (RBNode String (fun _ => Json)) := do
|
||||
lookahead (fun c => c = '"') "\""; skip; -- "
|
||||
let k ← strCore ""; ws
|
||||
lookahead (fun c => c = ':') ":"; skip; ws
|
||||
let v ← anyCore
|
||||
let c ← anyChar
|
||||
let c ← any
|
||||
if c = '}' then
|
||||
ws
|
||||
return RBNode.singleton k v
|
||||
@@ -161,7 +162,7 @@ partial def objectCore (anyCore : Parsec Json) : Parsec (RBNode String (fun _ =>
|
||||
else
|
||||
fail "unexpected character in object"
|
||||
|
||||
partial def anyCore : Parsec Json := do
|
||||
partial def anyCore : Parser Json := do
|
||||
let c ← peek!
|
||||
if c = '[' then
|
||||
skip; ws
|
||||
@@ -203,7 +204,7 @@ partial def anyCore : Parsec Json := do
|
||||
fail "unexpected input"
|
||||
|
||||
|
||||
def any : Parsec Json := do
|
||||
def any : Parser Json := do
|
||||
ws
|
||||
let res ← anyCore
|
||||
eof
|
||||
|
||||
@@ -4,181 +4,6 @@ Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian
|
||||
-/
|
||||
prelude
|
||||
import Init.NotationExtra
|
||||
import Init.Data.ToString.Macro
|
||||
|
||||
namespace Lean
|
||||
|
||||
namespace Parsec
|
||||
inductive ParseResult (α : Type) where
|
||||
| success (pos : String.Iterator) (res : α)
|
||||
| error (pos : String.Iterator) (err : String)
|
||||
deriving Repr
|
||||
end Parsec
|
||||
|
||||
def Parsec (α : Type) : Type := String.Iterator → Lean.Parsec.ParseResult α
|
||||
|
||||
namespace Parsec
|
||||
|
||||
open ParseResult
|
||||
|
||||
instance (α : Type) : Inhabited (Parsec α) :=
|
||||
⟨λ it => error it ""⟩
|
||||
|
||||
@[inline]
|
||||
protected def pure (a : α) : Parsec α := λ it =>
|
||||
success it a
|
||||
|
||||
@[inline]
|
||||
def bind {α β : Type} (f : Parsec α) (g : α → Parsec β) : Parsec β := λ it =>
|
||||
match f it with
|
||||
| success rem a => g a rem
|
||||
| error pos msg => error pos msg
|
||||
|
||||
instance : Monad Parsec :=
|
||||
{ pure := Parsec.pure, bind }
|
||||
|
||||
@[inline]
|
||||
def fail (msg : String) : Parsec α := fun it =>
|
||||
error it msg
|
||||
|
||||
@[inline]
|
||||
def tryCatch (p : Parsec α)
|
||||
(csuccess : α → Parsec β)
|
||||
(cerror : Unit → Parsec β)
|
||||
: Parsec β := fun it =>
|
||||
match p it with
|
||||
| .success rem a => csuccess a rem
|
||||
| .error rem err =>
|
||||
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
|
||||
if it.pos = rem.pos then cerror () rem else .error rem err
|
||||
|
||||
@[inline]
|
||||
def orElse (p : Parsec α) (q : Unit → Parsec α) : Parsec α :=
|
||||
tryCatch p pure q
|
||||
|
||||
@[inline]
|
||||
def attempt (p : Parsec α) : Parsec α := λ it =>
|
||||
match p it with
|
||||
| success rem res => success rem res
|
||||
| error _ err => error it err
|
||||
|
||||
instance : Alternative Parsec :=
|
||||
{ failure := fail "", orElse }
|
||||
|
||||
protected def run (p : Parsec α) (s : String) : Except String α :=
|
||||
match p s.mkIterator with
|
||||
| Parsec.ParseResult.success _ res => Except.ok res
|
||||
| Parsec.ParseResult.error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
|
||||
|
||||
def expectedEndOfInput := "expected end of input"
|
||||
|
||||
@[inline]
|
||||
def eof : Parsec Unit := fun it =>
|
||||
if it.hasNext then
|
||||
error it expectedEndOfInput
|
||||
else
|
||||
success it ()
|
||||
|
||||
@[specialize]
|
||||
partial def manyCore (p : Parsec α) (acc : Array α) : Parsec $ Array α :=
|
||||
tryCatch p (manyCore p <| acc.push ·) (fun _ => pure acc)
|
||||
|
||||
@[inline]
|
||||
def many (p : Parsec α) : Parsec $ Array α := manyCore p #[]
|
||||
|
||||
@[inline]
|
||||
def many1 (p : Parsec α) : Parsec $ Array α := do manyCore p #[←p]
|
||||
|
||||
@[specialize]
|
||||
partial def manyCharsCore (p : Parsec Char) (acc : String) : Parsec String :=
|
||||
tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)
|
||||
|
||||
@[inline]
|
||||
def manyChars (p : Parsec Char) : Parsec String := manyCharsCore p ""
|
||||
|
||||
@[inline]
|
||||
def many1Chars (p : Parsec Char) : Parsec String := do manyCharsCore p (←p).toString
|
||||
|
||||
/-- Parses the given string. -/
|
||||
def pstring (s : String) : Parsec String := λ it =>
|
||||
let substr := it.extract (it.forward s.length)
|
||||
if substr = s then
|
||||
success (it.forward s.length) substr
|
||||
else
|
||||
error it s!"expected: {s}"
|
||||
|
||||
@[inline]
|
||||
def skipString (s : String) : Parsec Unit := pstring s *> pure ()
|
||||
|
||||
def unexpectedEndOfInput := "unexpected end of input"
|
||||
|
||||
@[inline]
|
||||
def anyChar : Parsec Char := λ it =>
|
||||
if it.hasNext then success it.next it.curr else error it unexpectedEndOfInput
|
||||
|
||||
@[inline]
|
||||
def pchar (c : Char) : Parsec Char := attempt do
|
||||
if (←anyChar) = c then pure c else fail s!"expected: '{c}'"
|
||||
|
||||
@[inline]
|
||||
def skipChar (c : Char) : Parsec Unit := pchar c *> pure ()
|
||||
|
||||
@[inline]
|
||||
def digit : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if '0' ≤ c ∧ c ≤ '9' then return c else fail s!"digit expected"
|
||||
|
||||
@[inline]
|
||||
def hexDigit : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if ('0' ≤ c ∧ c ≤ '9')
|
||||
∨ ('a' ≤ c ∧ c ≤ 'f')
|
||||
∨ ('A' ≤ c ∧ c ≤ 'F') then return c else fail s!"hex digit expected"
|
||||
|
||||
@[inline]
|
||||
def asciiLetter : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then return c else fail s!"ASCII letter expected"
|
||||
|
||||
@[inline]
|
||||
def satisfy (p : Char → Bool) : Parsec Char := attempt do
|
||||
let c ← anyChar
|
||||
if p c then return c else fail "condition not satisfied"
|
||||
|
||||
@[inline]
|
||||
def notFollowedBy (p : Parsec α) : Parsec Unit := λ it =>
|
||||
match p it with
|
||||
| success _ _ => error it ""
|
||||
| error _ _ => success it ()
|
||||
|
||||
partial def skipWs (it : String.Iterator) : String.Iterator :=
|
||||
if it.hasNext then
|
||||
let c := it.curr
|
||||
if c = '\u0009' ∨ c = '\u000a' ∨ c = '\u000d' ∨ c = '\u0020' then
|
||||
skipWs it.next
|
||||
else
|
||||
it
|
||||
else
|
||||
it
|
||||
|
||||
@[inline]
|
||||
def peek? : Parsec (Option Char) := fun it =>
|
||||
if it.hasNext then
|
||||
success it it.curr
|
||||
else
|
||||
success it none
|
||||
|
||||
@[inline]
|
||||
def peek! : Parsec Char := do
|
||||
let some c ← peek? | fail unexpectedEndOfInput
|
||||
return c
|
||||
|
||||
@[inline]
|
||||
def skip : Parsec Unit := fun it =>
|
||||
success it.next ()
|
||||
|
||||
@[inline]
|
||||
def ws : Parsec Unit := fun it =>
|
||||
success (skipWs it) ()
|
||||
end Parsec
|
||||
import Lean.Data.Parsec.Basic
|
||||
import Lean.Data.Parsec.String
|
||||
import Lean.Data.Parsec.ByteArray
|
||||
|
||||
144
src/Lean/Data/Parsec/Basic.lean
Normal file
144
src/Lean/Data/Parsec/Basic.lean
Normal file
@@ -0,0 +1,144 @@
|
||||
/-
|
||||
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian, Henrik Böving
|
||||
-/
|
||||
prelude
|
||||
import Init.NotationExtra
|
||||
import Init.Data.ToString.Macro
|
||||
|
||||
namespace Lean
|
||||
|
||||
namespace Parsec
|
||||
|
||||
inductive ParseResult (α : Type) (ι : Type) where
|
||||
| success (pos : ι) (res : α)
|
||||
| error (pos : ι) (err : String)
|
||||
deriving Repr
|
||||
|
||||
end Parsec
|
||||
|
||||
def Parsec (ι : Type) (α : Type) : Type := ι → Lean.Parsec.ParseResult α ι
|
||||
|
||||
namespace Parsec
|
||||
|
||||
class Input (ι : Type) (elem : outParam Type) (idx : outParam Type) [DecidableEq idx] [DecidableEq elem] where
|
||||
pos : ι → idx
|
||||
next : ι → ι
|
||||
curr : ι → elem
|
||||
hasNext : ι → Bool
|
||||
|
||||
variable {α : Type} {ι : Type} {elem : Type} {idx : Type}
|
||||
variable [DecidableEq idx] [DecidableEq elem] [Input ι elem idx]
|
||||
|
||||
instance : Inhabited (Parsec ι α) where
|
||||
default := fun it => .error it ""
|
||||
|
||||
@[inline]
|
||||
protected def pure (a : α) : Parsec ι α := fun it =>
|
||||
.success it a
|
||||
|
||||
@[inline]
|
||||
def bind {α β : Type} (f : Parsec ι α) (g : α → Parsec ι β) : Parsec ι β := fun it =>
|
||||
match f it with
|
||||
| .success rem a => g a rem
|
||||
| .error pos msg => .error pos msg
|
||||
|
||||
instance : Monad (Parsec ι) where
|
||||
pure := Parsec.pure
|
||||
bind := Parsec.bind
|
||||
|
||||
@[inline]
|
||||
def fail (msg : String) : Parsec ι α := fun it =>
|
||||
.error it msg
|
||||
|
||||
@[inline]
|
||||
def tryCatch (p : Parsec ι α) (csuccess : α → Parsec ι β) (cerror : Unit → Parsec ι β)
|
||||
: Parsec ι β := fun it =>
|
||||
match p it with
|
||||
| .success rem a => csuccess a rem
|
||||
| .error rem err =>
|
||||
-- We assume that it.s never changes as the `Parsec` monad only modifies `it.pos`.
|
||||
if Input.pos it = Input.pos rem then cerror () rem else .error rem err
|
||||
|
||||
@[inline]
|
||||
def orElse (p : Parsec ι α) (q : Unit → Parsec ι α) : Parsec ι α :=
|
||||
tryCatch p pure q
|
||||
|
||||
@[inline]
|
||||
def attempt (p : Parsec ι α) : Parsec ι α := fun it =>
|
||||
match p it with
|
||||
| .success rem res => .success rem res
|
||||
| .error _ err => .error it err
|
||||
|
||||
instance : Alternative (Parsec ι) where
|
||||
failure := fail ""
|
||||
orElse := orElse
|
||||
|
||||
def expectedEndOfInput := "expected end of input"
|
||||
|
||||
@[inline]
|
||||
def eof : Parsec ι Unit := fun it =>
|
||||
if Input.hasNext it then
|
||||
.error it expectedEndOfInput
|
||||
else
|
||||
.success it ()
|
||||
|
||||
@[specialize]
|
||||
partial def manyCore (p : Parsec ι α) (acc : Array α) : Parsec ι <| Array α :=
|
||||
tryCatch p (manyCore p <| acc.push ·) (fun _ => pure acc)
|
||||
|
||||
@[inline]
|
||||
def many (p : Parsec ι α) : Parsec ι <| Array α := manyCore p #[]
|
||||
|
||||
@[inline]
|
||||
def many1 (p : Parsec ι α) : Parsec ι <| Array α := do manyCore p #[← p]
|
||||
|
||||
def unexpectedEndOfInput := "unexpected end of input"
|
||||
|
||||
@[inline]
|
||||
def any : Parsec ι elem := fun it =>
|
||||
if Input.hasNext it then
|
||||
.success (Input.next it) (Input.curr it)
|
||||
else
|
||||
.error it unexpectedEndOfInput
|
||||
|
||||
@[inline]
|
||||
def satisfy (p : elem → Bool) : Parsec ι elem := attempt do
|
||||
let c ← any
|
||||
if p c then return c else fail "condition not satisfied"
|
||||
|
||||
@[inline]
|
||||
def notFollowedBy (p : Parsec ι α) : Parsec ι Unit := fun it =>
|
||||
match p it with
|
||||
| .success _ _ => .error it ""
|
||||
| .error _ _ => .success it ()
|
||||
|
||||
@[inline]
|
||||
def peek? : Parsec ι (Option elem) := fun it =>
|
||||
if Input.hasNext it then
|
||||
.success it (Input.curr it)
|
||||
else
|
||||
.success it none
|
||||
|
||||
@[inline]
|
||||
def peek! : Parsec ι elem := do
|
||||
let some c ← peek? | fail unexpectedEndOfInput
|
||||
return c
|
||||
|
||||
@[inline]
|
||||
def skip : Parsec ι Unit := fun it =>
|
||||
.success (Input.next it) ()
|
||||
|
||||
@[specialize]
|
||||
partial def manyCharsCore (p : Parsec ι Char) (acc : String) : Parsec ι String :=
|
||||
tryCatch p (manyCharsCore p <| acc.push ·) (fun _ => pure acc)
|
||||
|
||||
@[inline]
|
||||
def manyChars (p : Parsec ι Char) : Parsec ι String := manyCharsCore p ""
|
||||
|
||||
@[inline]
|
||||
def many1Chars (p : Parsec ι Char) : Parsec ι String := do manyCharsCore p (← p).toString
|
||||
|
||||
|
||||
end Parsec
|
||||
103
src/Lean/Data/Parsec/ByteArray.lean
Normal file
103
src/Lean/Data/Parsec/ByteArray.lean
Normal file
@@ -0,0 +1,103 @@
|
||||
/-
|
||||
Copyright (c) 2024 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Henrik Böving
|
||||
-/
|
||||
prelude
|
||||
import Lean.Data.Parsec.Basic
|
||||
import Init.Data.ByteArray.Basic
|
||||
import Init.Data.String.Extra
|
||||
|
||||
namespace Lean
|
||||
namespace Parsec
|
||||
namespace ByteArray
|
||||
|
||||
instance : Input ByteArray.Iterator UInt8 Nat where
|
||||
pos it := it.pos
|
||||
next it := it.next
|
||||
curr it := it.curr
|
||||
hasNext it := it.hasNext
|
||||
|
||||
abbrev Parser (α : Type) : Type := Parsec ByteArray.Iterator α
|
||||
|
||||
protected def Parser.run (p : Parser α) (arr : ByteArray) : Except String α :=
|
||||
match p arr.iter with
|
||||
| .success _ res => Except.ok res
|
||||
| .error it err => Except.error s!"offset {repr it.pos}: {err}"
|
||||
|
||||
@[inline]
|
||||
def pbyte (b : UInt8) : Parser UInt8 := attempt do
|
||||
if (← any) = b then pure b else fail s!"expected: '{b}'"
|
||||
|
||||
@[inline]
|
||||
def skipByte (b : UInt8) : Parser Unit := pbyte b *> pure ()
|
||||
|
||||
def skipBytes (arr : ByteArray) : Parser Unit := do
|
||||
for b in arr do
|
||||
skipByte b
|
||||
|
||||
@[inline]
|
||||
def pstring (s : String) : Parser String := do
|
||||
skipBytes s.toUTF8
|
||||
return s
|
||||
|
||||
@[inline]
|
||||
def skipString (s : String) : Parser Unit := pstring s *> pure ()
|
||||
|
||||
/--
|
||||
Parse a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it is truncated.
|
||||
-/
|
||||
@[inline]
|
||||
def pByteChar (c : Char) : Parser Char := attempt do
|
||||
if (← any) = c.toUInt8 then pure c else fail s!"expected: '{c}'"
|
||||
|
||||
/--
|
||||
Skip a `Char` that can be represented in 1 byte. If `c` uses more than 1 byte it is truncated.
|
||||
-/
|
||||
@[inline]
|
||||
def skipByteChar (c : Char) : Parser Unit := skipByte c.toUInt8
|
||||
|
||||
@[inline]
|
||||
def digit : Parser Char := attempt do
|
||||
let b ← any
|
||||
if '0'.toUInt8 ≤ b ∧ b ≤ '9'.toUInt8 then return Char.ofUInt8 b else fail s!"digit expected"
|
||||
|
||||
@[inline]
|
||||
def hexDigit : Parser Char := attempt do
|
||||
let b ← any
|
||||
if ('0'.toUInt8 ≤ b ∧ b ≤ '9'.toUInt8)
|
||||
∨ ('a'.toUInt8 ≤ b ∧ b ≤ 'f'.toUInt8)
|
||||
∨ ('A'.toUInt8 ≤ b ∧ b ≤ 'F'.toUInt8) then return Char.ofUInt8 b else fail s!"hex digit expected"
|
||||
|
||||
@[inline]
|
||||
def asciiLetter : Parser Char := attempt do
|
||||
let b ← any
|
||||
if ('A'.toUInt8 ≤ b ∧ b ≤ 'Z'.toUInt8) ∨ ('a'.toUInt8 ≤ b ∧ b ≤ 'z'.toUInt8) then
|
||||
return Char.ofUInt8 b
|
||||
else
|
||||
fail s!"ASCII letter expected"
|
||||
|
||||
private partial def skipWs (it : ByteArray.Iterator) : ByteArray.Iterator :=
|
||||
if it.hasNext then
|
||||
let b := it.curr
|
||||
if b = '\u0009'.toUInt8 ∨ b = '\u000a'.toUInt8 ∨ b = '\u000d'.toUInt8 ∨ b = '\u0020'.toUInt8 then
|
||||
skipWs it.next
|
||||
else
|
||||
it
|
||||
else
|
||||
it
|
||||
|
||||
@[inline]
|
||||
def ws : Parser Unit := fun it =>
|
||||
.success (skipWs it) ()
|
||||
|
||||
def take (n : Nat) : Parser ByteArray := fun it =>
|
||||
let subarr := it.array.extract it.idx (it.idx + n)
|
||||
if subarr.size != n then
|
||||
.error it s!"expected: {n} bytes"
|
||||
else
|
||||
.success (it.forward n) subarr
|
||||
|
||||
end ByteArray
|
||||
end Parsec
|
||||
end Lean
|
||||
84
src/Lean/Data/Parsec/String.lean
Normal file
84
src/Lean/Data/Parsec/String.lean
Normal file
@@ -0,0 +1,84 @@
|
||||
/-
|
||||
Copyright (c) 2021 Microsoft Corporation. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Author: Dany Fabian, Henrik Böving
|
||||
-/
|
||||
prelude
|
||||
import Lean.Data.Parsec.Basic
|
||||
|
||||
namespace Lean
|
||||
namespace Parsec
|
||||
namespace String
|
||||
|
||||
instance : Input String.Iterator Char String.Pos where
|
||||
pos it := it.pos
|
||||
next it := it.next
|
||||
curr it := it.curr
|
||||
hasNext it := it.hasNext
|
||||
|
||||
abbrev Parser (α : Type) : Type := Parsec String.Iterator α
|
||||
|
||||
protected def Parser.run (p : Parser α) (s : String) : Except String α :=
|
||||
match p s.mkIterator with
|
||||
| .success _ res => Except.ok res
|
||||
| .error it err => Except.error s!"offset {repr it.i.byteIdx}: {err}"
|
||||
|
||||
/-- Parses the given string. -/
|
||||
def pstring (s : String) : Parser String := fun it =>
|
||||
let substr := it.extract (it.forward s.length)
|
||||
if substr = s then
|
||||
.success (it.forward s.length) substr
|
||||
else
|
||||
.error it s!"expected: {s}"
|
||||
|
||||
@[inline]
|
||||
def skipString (s : String) : Parser Unit := pstring s *> pure ()
|
||||
|
||||
@[inline]
|
||||
def pchar (c : Char) : Parser Char := attempt do
|
||||
if (← any) = c then pure c else fail s!"expected: '{c}'"
|
||||
|
||||
@[inline]
|
||||
def skipChar (c : Char) : Parser Unit := pchar c *> pure ()
|
||||
|
||||
@[inline]
|
||||
def digit : Parser Char := attempt do
|
||||
let c ← any
|
||||
if '0' ≤ c ∧ c ≤ '9' then return c else fail s!"digit expected"
|
||||
|
||||
@[inline]
|
||||
def hexDigit : Parser Char := attempt do
|
||||
let c ← any
|
||||
if ('0' ≤ c ∧ c ≤ '9')
|
||||
∨ ('a' ≤ c ∧ c ≤ 'f')
|
||||
∨ ('A' ≤ c ∧ c ≤ 'F') then return c else fail s!"hex digit expected"
|
||||
|
||||
@[inline]
|
||||
def asciiLetter : Parser Char := attempt do
|
||||
let c ← any
|
||||
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then return c else fail s!"ASCII letter expected"
|
||||
|
||||
private partial def skipWs (it : String.Iterator) : String.Iterator :=
|
||||
if it.hasNext then
|
||||
let c := it.curr
|
||||
if c = '\u0009' ∨ c = '\u000a' ∨ c = '\u000d' ∨ c = '\u0020' then
|
||||
skipWs it.next
|
||||
else
|
||||
it
|
||||
else
|
||||
it
|
||||
|
||||
@[inline]
|
||||
def ws : Parser Unit := fun it =>
|
||||
.success (skipWs it) ()
|
||||
|
||||
def take (n : Nat) : Parser String := fun it =>
|
||||
let substr := it.extract (it.forward n)
|
||||
if substr.length != n then
|
||||
.error it s!"expected: {n} codepoints"
|
||||
else
|
||||
.success (it.forward n) substr
|
||||
|
||||
end String
|
||||
end Parsec
|
||||
end Lean
|
||||
@@ -13,23 +13,24 @@ namespace Lean
|
||||
namespace Xml
|
||||
|
||||
namespace Parser
|
||||
|
||||
open Lean.Parsec
|
||||
open Parsec.ParseResult
|
||||
open Lean.Parsec.String
|
||||
|
||||
abbrev LeanChar := Char
|
||||
|
||||
/-- consume a newline character sequence pretending, that we read '\n'. As per spec:
|
||||
https://www.w3.org/TR/xml/#sec-line-ends -/
|
||||
def endl : Parsec LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n'
|
||||
def endl : Parser LeanChar := (skipString "\r\n" <|> skipChar '\r' <|> skipChar '\n') *> pure '\n'
|
||||
|
||||
def quote (p : Parsec α) : Parsec α :=
|
||||
def quote (p : Parser α) : Parser α :=
|
||||
skipChar '\'' *> p <* skipChar '\''
|
||||
<|> skipChar '"' *> p <* skipChar '"'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Char -/
|
||||
def Char : Parsec LeanChar :=
|
||||
def Char : Parser LeanChar :=
|
||||
(attempt do
|
||||
let c ← anyChar
|
||||
let c ← any
|
||||
let cNat := c.toNat
|
||||
if (0x20 ≤ cNat ∧ cNat ≤ 0xD7FF)
|
||||
∨ (0xE000 ≤ cNat ∧ cNat ≤ 0xFFFD)
|
||||
@@ -37,11 +38,11 @@ def Char : Parsec LeanChar :=
|
||||
<|> pchar '\t' <|> endl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-S -/
|
||||
def S : Parsec String :=
|
||||
def S : Parser String :=
|
||||
many1Chars (pchar ' ' <|> endl <|> pchar '\t')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Eq -/
|
||||
def Eq : Parsec Unit :=
|
||||
def Eq : Parser Unit :=
|
||||
optional S *> skipChar '=' <* optional S
|
||||
|
||||
private def nameStartCharRanges : Array (Nat × Nat) :=
|
||||
@@ -59,8 +60,8 @@ private def nameStartCharRanges : Array (Nat × Nat) :=
|
||||
(0x10000, 0xEFFFF)]
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NameStartChar -/
|
||||
def NameStartChar : Parsec LeanChar := attempt do
|
||||
let c ← anyChar
|
||||
def NameStartChar : Parser LeanChar := attempt do
|
||||
let c ← any
|
||||
if ('A' ≤ c ∧ c ≤ 'Z') ∨ ('a' ≤ c ∧ c ≤ 'z') then pure c
|
||||
else if c = ':' ∨ c = '_' then pure c
|
||||
else
|
||||
@@ -69,44 +70,44 @@ def NameStartChar : Parsec LeanChar := attempt do
|
||||
else fail "expected a name character"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NameChar -/
|
||||
def NameChar : Parsec LeanChar :=
|
||||
def NameChar : Parser LeanChar :=
|
||||
NameStartChar <|> digit <|> pchar '-' <|> pchar '.' <|> pchar '\xB7'
|
||||
<|> satisfy (λ c => ('\u0300' ≤ c ∧ c ≤ '\u036F') ∨ ('\u203F' ≤ c ∧ c ≤ '\u2040'))
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Name -/
|
||||
def Name : Parsec String := do
|
||||
def Name : Parser String := do
|
||||
let x ← NameStartChar
|
||||
manyCharsCore NameChar x.toString
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-VersionNum -/
|
||||
def VersionNum : Parsec Unit :=
|
||||
def VersionNum : Parser Unit :=
|
||||
skipString "1." <* (many1 digit)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-VersionInfo -/
|
||||
def VersionInfo : Parsec Unit := do
|
||||
def VersionInfo : Parser Unit := do
|
||||
S *>
|
||||
skipString "version"
|
||||
Eq
|
||||
quote VersionNum
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EncName -/
|
||||
def EncName : Parsec String := do
|
||||
def EncName : Parser String := do
|
||||
let x ← asciiLetter
|
||||
manyCharsCore (asciiLetter <|> digit <|> pchar '-' <|> pchar '_' <|> pchar '.') x.toString
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EncodingDecl -/
|
||||
def EncodingDecl : Parsec String := do
|
||||
def EncodingDecl : Parser String := do
|
||||
S *>
|
||||
skipString "encoding"
|
||||
Eq
|
||||
quote EncName
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-SDDecl -/
|
||||
def SDDecl : Parsec String := do
|
||||
def SDDecl : Parser String := do
|
||||
S *> skipString "standalone" *> Eq *> quote (pstring "yes" <|> pstring "no")
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-XMLDecl -/
|
||||
def XMLdecl : Parsec Unit := do
|
||||
def XMLdecl : Parser Unit := do
|
||||
skipString "<?xml"
|
||||
VersionInfo
|
||||
optional EncodingDecl *>
|
||||
@@ -115,7 +116,7 @@ def XMLdecl : Parsec Unit := do
|
||||
skipString "?>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Comment -/
|
||||
def Comment : Parsec String :=
|
||||
def Comment : Parser String :=
|
||||
let notDash := Char.toString <$> satisfy (λ c => c ≠ '-')
|
||||
skipString "<!--" *>
|
||||
Array.foldl String.append "" <$> many (attempt <| notDash <|> (do
|
||||
@@ -125,45 +126,45 @@ def Comment : Parsec String :=
|
||||
<* skipString "-->"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PITarget -/
|
||||
def PITarget : Parsec String :=
|
||||
def PITarget : Parser String :=
|
||||
Name <* (skipChar 'X' <|> skipChar 'x') <* (skipChar 'M' <|> skipChar 'm') <* (skipChar 'L' <|> skipChar 'l')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PI -/
|
||||
def PI : Parsec Unit := do
|
||||
def PI : Parser Unit := do
|
||||
skipString "<?"
|
||||
<* PITarget <*
|
||||
optional (S *> manyChars (notFollowedBy (skipString "?>") *> Char))
|
||||
skipString "?>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Misc -/
|
||||
def Misc : Parsec Unit :=
|
||||
def Misc : Parser Unit :=
|
||||
Comment *> pure () <|> PI <|> S *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-SystemLiteral -/
|
||||
def SystemLiteral : Parsec String :=
|
||||
def SystemLiteral : Parser String :=
|
||||
pchar '"' *> manyChars (satisfy λ c => c ≠ '"') <* pchar '"'
|
||||
<|> pchar '\'' *> manyChars (satisfy λ c => c ≠ '\'') <* pure '\''
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PubidChar -/
|
||||
def PubidChar : Parsec LeanChar :=
|
||||
def PubidChar : Parser LeanChar :=
|
||||
asciiLetter <|> digit <|> endl <|> attempt do
|
||||
let c ← anyChar
|
||||
let c ← any
|
||||
if "-'()+,./:=?;!*#@$_%".contains c then pure c else fail "PublidChar expected"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PubidLiteral -/
|
||||
def PubidLiteral : Parsec String :=
|
||||
def PubidLiteral : Parser String :=
|
||||
pchar '"' *> manyChars PubidChar <* pchar '"'
|
||||
<|> pchar '\'' *> manyChars (attempt do
|
||||
let c ← PubidChar
|
||||
if c = '\'' then fail "'\\'' not expected" else pure c) <* pchar '\''
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-ExternalID -/
|
||||
def ExternalID : Parsec Unit :=
|
||||
def ExternalID : Parser Unit :=
|
||||
skipString "SYSTEM" *> S *> SystemLiteral *> pure ()
|
||||
<|> skipString "PUBLIC" *> S *> PubidLiteral *> S *> SystemLiteral *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Mixed -/
|
||||
def Mixed : Parsec Unit :=
|
||||
def Mixed : Parser Unit :=
|
||||
(do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
@@ -175,11 +176,11 @@ def Mixed : Parsec Unit :=
|
||||
|
||||
mutual
|
||||
/-- https://www.w3.org/TR/xml/#NT-cp -/
|
||||
partial def cp : Parsec Unit :=
|
||||
partial def cp : Parser Unit :=
|
||||
(Name *> pure () <|> choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-choice -/
|
||||
partial def choice : Parsec Unit := do
|
||||
partial def choice : Parser Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
cp
|
||||
@@ -188,7 +189,7 @@ mutual
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-seq -/
|
||||
partial def seq : Parsec Unit := do
|
||||
partial def seq : Parser Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
cp
|
||||
@@ -198,15 +199,15 @@ mutual
|
||||
end
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-children -/
|
||||
def children : Parsec Unit :=
|
||||
def children : Parser Unit :=
|
||||
(choice <|> seq) <* optional (skipChar '?' <|> skipChar '*' <|> skipChar '+')
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-contentspec -/
|
||||
def contentspec : Parsec Unit := do
|
||||
def contentspec : Parser Unit := do
|
||||
skipString "EMPTY" <|> skipString "ANY" <|> Mixed <|> children
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-elementdecl -/
|
||||
def elementDecl : Parsec Unit := do
|
||||
def elementDecl : Parser Unit := do
|
||||
skipString "<!ELEMENT"
|
||||
S *>
|
||||
Name *>
|
||||
@@ -215,11 +216,11 @@ def elementDecl : Parsec Unit := do
|
||||
skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-StringType -/
|
||||
def StringType : Parsec Unit :=
|
||||
def StringType : Parser Unit :=
|
||||
skipString "CDATA"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-TokenizedType -/
|
||||
def TokenizedType : Parsec Unit :=
|
||||
def TokenizedType : Parser Unit :=
|
||||
skipString "ID"
|
||||
<|> skipString "IDREF"
|
||||
<|> skipString "IDREFS"
|
||||
@@ -229,7 +230,7 @@ def TokenizedType : Parsec Unit :=
|
||||
<|> skipString "NMTOKENS"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NotationType -/
|
||||
def NotationType : Parsec Unit := do
|
||||
def NotationType : Parser Unit := do
|
||||
skipString "NOTATION"
|
||||
S *>
|
||||
skipChar '(' <*
|
||||
@@ -239,11 +240,11 @@ def NotationType : Parsec Unit := do
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Nmtoken -/
|
||||
def Nmtoken : Parsec String := do
|
||||
def Nmtoken : Parser String := do
|
||||
many1Chars NameChar
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Enumeration -/
|
||||
def Enumeration : Parsec Unit := do
|
||||
def Enumeration : Parser Unit := do
|
||||
skipChar '('
|
||||
optional S *>
|
||||
Nmtoken *> many (optional S *> skipChar '|' *> optional S *> Nmtoken) *>
|
||||
@@ -251,11 +252,11 @@ def Enumeration : Parsec Unit := do
|
||||
skipChar ')'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EnumeratedType -/
|
||||
def EnumeratedType : Parsec Unit :=
|
||||
def EnumeratedType : Parser Unit :=
|
||||
NotationType <|> Enumeration
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttType -/
|
||||
def AttType : Parsec Unit :=
|
||||
def AttType : Parser Unit :=
|
||||
StringType <|> TokenizedType <|> EnumeratedType
|
||||
|
||||
def predefinedEntityToChar : String → Option LeanChar
|
||||
@@ -267,7 +268,7 @@ def predefinedEntityToChar : String → Option LeanChar
|
||||
| _ => none
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityRef -/
|
||||
def EntityRef : Parsec $ Option LeanChar := attempt $
|
||||
def EntityRef : Parser $ Option LeanChar := attempt $
|
||||
skipChar '&' *> predefinedEntityToChar <$> Name <* skipChar ';'
|
||||
|
||||
@[inline]
|
||||
@@ -280,7 +281,7 @@ def digitsToNat (base : Nat) (digits : Array Nat) : Nat :=
|
||||
digits.foldl (λ r d => r * base + d) 0
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CharRef -/
|
||||
def CharRef : Parsec LeanChar := do
|
||||
def CharRef : Parser LeanChar := do
|
||||
skipString "&#"
|
||||
let charCode ←
|
||||
digitsToNat 10 <$> many1 (hexDigitToNat <$> digit)
|
||||
@@ -289,11 +290,11 @@ def CharRef : Parsec LeanChar := do
|
||||
return Char.ofNat charCode
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Reference -/
|
||||
def Reference : Parsec $ Option LeanChar :=
|
||||
def Reference : Parser $ Option LeanChar :=
|
||||
EntityRef <|> some <$> CharRef
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttValue -/
|
||||
def AttValue : Parsec String := do
|
||||
def AttValue : Parser String := do
|
||||
let chars ←
|
||||
(do
|
||||
skipChar '"'
|
||||
@@ -306,25 +307,25 @@ def AttValue : Parsec String := do
|
||||
return chars.foldl (λ s c => if let some c := c then s.push c else s) ""
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-DefaultDecl -/
|
||||
def DefaultDecl : Parsec Unit :=
|
||||
def DefaultDecl : Parser Unit :=
|
||||
skipString "#REQUIRED"
|
||||
<|> skipString "#IMPLIED"
|
||||
<|> optional (skipString "#FIXED" <* S) *> AttValue *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttDef -/
|
||||
def AttDef : Parsec Unit :=
|
||||
def AttDef : Parser Unit :=
|
||||
S *> Name *> S *> AttType *> S *> DefaultDecl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-AttlistDecl -/
|
||||
def AttlistDecl : Parsec Unit :=
|
||||
def AttlistDecl : Parser Unit :=
|
||||
skipString "<!ATTLIST" *> S *> Name *> many AttDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEReference -/
|
||||
def PEReference : Parsec Unit :=
|
||||
def PEReference : Parser Unit :=
|
||||
skipChar '%' *> Name *> skipChar ';'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityValue -/
|
||||
def EntityValue : Parsec String := do
|
||||
def EntityValue : Parser String := do
|
||||
let chars ←
|
||||
(do
|
||||
skipChar '"'
|
||||
@@ -338,51 +339,51 @@ def EntityValue : Parsec String := do
|
||||
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NDataDecl -/
|
||||
def NDataDecl : Parsec Unit :=
|
||||
def NDataDecl : Parser Unit :=
|
||||
S *> skipString "NDATA" <* S <* Name
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityDef -/
|
||||
def EntityDef : Parsec Unit :=
|
||||
def EntityDef : Parser Unit :=
|
||||
EntityValue *> pure () <|> (ExternalID <* optional NDataDecl)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-GEDecl -/
|
||||
def GEDecl : Parsec Unit :=
|
||||
def GEDecl : Parser Unit :=
|
||||
skipString "<!ENTITY" *> S *> Name *> S *> EntityDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEDef -/
|
||||
def PEDef : Parsec Unit :=
|
||||
def PEDef : Parser Unit :=
|
||||
EntityValue *> pure () <|> ExternalID
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PEDecl -/
|
||||
def PEDecl : Parsec Unit :=
|
||||
def PEDecl : Parser Unit :=
|
||||
skipString "<!ENTITY" *> S *> skipChar '%' *> S *> Name *> PEDef *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EntityDecl -/
|
||||
def EntityDecl : Parsec Unit :=
|
||||
def EntityDecl : Parser Unit :=
|
||||
GEDecl <|> PEDecl
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-PublicID -/
|
||||
def PublicID : Parsec Unit :=
|
||||
def PublicID : Parser Unit :=
|
||||
skipString "PUBLIC" <* S <* PubidLiteral
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-NotationDecl -/
|
||||
def NotationDecl : Parsec Unit :=
|
||||
def NotationDecl : Parser Unit :=
|
||||
skipString "<!NOTATION" *> S *> Name *> (ExternalID <|> PublicID) *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-markupdecl -/
|
||||
def markupDecl : Parsec Unit :=
|
||||
def markupDecl : Parser Unit :=
|
||||
elementDecl <|> AttlistDecl <|> EntityDecl <|> NotationDecl <|> PI <|> (Comment *> pure ())
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-DeclSep -/
|
||||
def DeclSep : Parsec Unit :=
|
||||
def DeclSep : Parser Unit :=
|
||||
PEReference <|> S *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-intSubset -/
|
||||
def intSubset : Parsec Unit :=
|
||||
def intSubset : Parser Unit :=
|
||||
many (markupDecl <|> DeclSep) *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-doctypedecl -/
|
||||
def doctypedecl : Parsec Unit := do
|
||||
def doctypedecl : Parser Unit := do
|
||||
skipString "<!DOCTYPE"
|
||||
S *>
|
||||
Name *>
|
||||
@@ -392,19 +393,19 @@ def doctypedecl : Parsec Unit := do
|
||||
skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-prolog -/
|
||||
def prolog : Parsec Unit :=
|
||||
def prolog : Parser Unit :=
|
||||
optional XMLdecl *>
|
||||
many Misc *>
|
||||
optional (doctypedecl <* many Misc) *> pure ()
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-Attribute -/
|
||||
def Attribute : Parsec (String × String) := do
|
||||
def Attribute : Parser (String × String) := do
|
||||
let name ← Name
|
||||
Eq
|
||||
let value ← AttValue
|
||||
return (name, value)
|
||||
|
||||
protected def elementPrefix : Parsec (Array Content → Element) := do
|
||||
protected def elementPrefix : Parser (Array Content → Element) := do
|
||||
skipChar '<'
|
||||
let name ← Name
|
||||
let attributes ← many (attempt <| S *> Attribute)
|
||||
@@ -412,40 +413,40 @@ protected def elementPrefix : Parsec (Array Content → Element) := do
|
||||
return Element.Element name (RBMap.fromList attributes.toList compare)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-EmptyElemTag -/
|
||||
def EmptyElemTag (elem : Array Content → Element) : Parsec Element := do
|
||||
def EmptyElemTag (elem : Array Content → Element) : Parser Element := do
|
||||
skipString "/>" *> pure (elem #[])
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-STag -/
|
||||
def STag (elem : Array Content → Element) : Parsec (Array Content → Element) := do
|
||||
def STag (elem : Array Content → Element) : Parser (Array Content → Element) := do
|
||||
skipChar '>' *> pure elem
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-ETag -/
|
||||
def ETag : Parsec Unit :=
|
||||
def ETag : Parser Unit :=
|
||||
skipString "</" *> Name *> optional S *> skipChar '>'
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDStart -/
|
||||
def CDStart : Parsec Unit :=
|
||||
def CDStart : Parser Unit :=
|
||||
skipString "<![CDATA["
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDEnd -/
|
||||
def CDEnd : Parsec Unit :=
|
||||
def CDEnd : Parser Unit :=
|
||||
skipString "]]>"
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CData -/
|
||||
def CData : Parsec String :=
|
||||
manyChars (notFollowedBy (skipString "]]>") *> anyChar)
|
||||
def CData : Parser String :=
|
||||
manyChars (notFollowedBy (skipString "]]>") *> any)
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CDSect -/
|
||||
def CDSect : Parsec String :=
|
||||
def CDSect : Parser String :=
|
||||
CDStart *> CData <* CDEnd
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-CharData -/
|
||||
def CharData : Parsec String :=
|
||||
def CharData : Parser String :=
|
||||
notFollowedBy (skipString "]]>") *> manyChars (satisfy λ c => c ≠ '<' ∧ c ≠ '&')
|
||||
|
||||
mutual
|
||||
/-- https://www.w3.org/TR/xml/#NT-content -/
|
||||
partial def content : Parsec (Array Content) := do
|
||||
partial def content : Parser (Array Content) := do
|
||||
let x ← optional (Content.Character <$> CharData)
|
||||
let xs ← many do
|
||||
let y ←
|
||||
@@ -468,20 +469,20 @@ mutual
|
||||
return res
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-element -/
|
||||
partial def element : Parsec Element := do
|
||||
partial def element : Parser Element := do
|
||||
let elem ← Parser.elementPrefix
|
||||
EmptyElemTag elem <|> STag elem <*> content <* ETag
|
||||
|
||||
end
|
||||
|
||||
/-- https://www.w3.org/TR/xml/#NT-document -/
|
||||
def document : Parsec Element := prolog *> element <* many Misc <* eof
|
||||
def document : Parser Element := prolog *> element <* many Misc <* eof
|
||||
|
||||
end Parser
|
||||
|
||||
def parse (s : String) : Except String Element :=
|
||||
match Xml.Parser.document s.mkIterator with
|
||||
| Parsec.ParseResult.success _ res => Except.ok res
|
||||
| Parsec.ParseResult.error it err => Except.error s!"offset {it.i.byteIdx.repr}: {err}\n{(it.prevn 10).extract it}"
|
||||
| .success _ res => Except.ok res
|
||||
| .error it err => Except.error s!"offset {it.i.byteIdx.repr}: {err}\n{(it.prevn 10).extract it}"
|
||||
|
||||
end Xml
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import Lean.Data.Parsec
|
||||
open Lean Parsec
|
||||
open Lean Parsec String
|
||||
|
||||
@[macro_inline] -- Error
|
||||
def f : Nat → Nat
|
||||
@@ -19,7 +19,7 @@ def h : Nat → Nat → Nat
|
||||
termination_by x y => x
|
||||
|
||||
@[macro_inline] -- Error
|
||||
partial def skipMany (p : Parsec α) (it : String.Iterator) : Parsec PUnit := do
|
||||
partial def skipMany (p : Parser α) (it : String.Iterator) : Parser PUnit := do
|
||||
match p it with
|
||||
| .success it _ => skipMany p it
|
||||
| .error _ _ => pure ()
|
||||
|
||||
@@ -57,12 +57,16 @@ def Lean.Widget.GetWidgetsResponse.debugJson (r : Widget.GetWidgetsResponse) : J
|
||||
)
|
||||
]
|
||||
|
||||
def word : Parsec String :=
|
||||
Parsec.many1Chars <| Parsec.digit <|> Parsec.asciiLetter <|> Parsec.pchar '_'
|
||||
open Parsec in
|
||||
open Parsec.String in
|
||||
def word : Parser String :=
|
||||
many1Chars <| digit <|> asciiLetter <|> pchar '_'
|
||||
|
||||
def ident : Parsec Name := do
|
||||
open Parsec in
|
||||
open Parsec.String in
|
||||
def ident : Parser Name := do
|
||||
let head ← word
|
||||
let xs ← Parsec.many1 (Parsec.pchar '.' *> word)
|
||||
let xs ← many1 (pchar '.' *> word)
|
||||
return xs.foldl .str $ .mkSimple head
|
||||
|
||||
partial def main (args : List String) : IO Unit := do
|
||||
|
||||
Reference in New Issue
Block a user