Files
lean4/tests/playground/flat_parser.lean
2022-10-19 09:28:08 -07:00

395 lines
14 KiB
Lean4
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import init.lean.message init.lean.parser.syntax init.lean.parser.trie init.lean.parser.basic init.lean.parser.stringliteral
import init.lean.parser.token
namespace Lean
namespace flatParser
open String
open Parser (Syntax Syntax.missing)
open Parser (Trie TokenMap)
abbreviation pos := String.Pos
/-- A precomputed cache for quickly mapping Char offsets to positions. -/
structure FileMap :=
(offsets : Array Nat)
(lines : Array Nat)
namespace FileMap
private def fromStringAux (s : String) : Nat Nat Nat pos Array Nat Array Nat FileMap
| 0 offset line i offsets lines := offsets.push offset, lines.push line
| (k+1) offset line i offsets lines :=
if s.atEnd i then offsets.push offset, lines.push line
else let c := s.get i in
let i := s.next i in
let offset := offset + 1 in
if c = '\n'
then fromStringAux k offset (line+1) i (offsets.push offset) (lines.push (line+1))
else fromStringAux k offset line i offsets lines
def fromString (s : String) : FileMap :=
fromStringAux s s.length 0 1 0 (Array.empty.push 0) (Array.empty.push 1)
/- Remark: `offset is in [(offsets.get b), (offsets.get e)]` and `b < e` -/
private def toPositionAux (offsets : Array Nat) (lines : Array Nat) (offset : Nat) : Nat Nat Nat Position
| 0 b e := offset, 1 -- unreachable
| (k+1) b e :=
let offsetB := offsets.get b in
if e = b + 1 then offset - offsetB, lines.get b
else let m := (b + e) / 2 in
let offsetM := offsets.get m in
if offset = offsetM then 0, lines.get m
else if offset > offsetM then toPositionAux k m e
else toPositionAux k b m
def toPosition : FileMap Nat Position
| offsets, lines offset := toPositionAux offsets lines offset offsets.size 0 (offsets.size-1)
end FileMap
structure TokenConfig :=
(«prefix» : String)
(lbp : Nat := 0)
structure FrontendConfig :=
(filename : String)
(input : String)
(FileMap : FileMap)
/- Remark: if we have a Node in the Trie with `some TokenConfig`, the String induced by the path is equal to the `TokenConfig.prefix`. -/
structure ParserConfig extends FrontendConfig :=
(tokens : Trie TokenConfig)
-- Backtrackable State
structure ParserState :=
(messages : MessageLog)
structure TokenCacheEntry :=
(startPos stopPos : pos)
(tk : Syntax)
-- Non-backtrackable State
structure ParserCache :=
(tokenCache : Option TokenCacheEntry := none)
inductive Result (α : Type)
| ok (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result
| error {} (msg : String) (i : pos) (cache : ParserCache) (stx : Syntax) (eps : Bool) : Result
inductive Result.IsOk {α : Type} : Result α Prop
| mk (a : α) (i : pos) (cache : ParserCache) (State : ParserState) (eps : Bool) : Result.IsOk (Result.ok a i cache State eps)
theorem errorIsNotOk {α : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool}
(h : Result.IsOk (@Result.error α msg i cache stx eps)) : False :=
match h with end
@[inline] def unreachableError {α β : Type} {msg : String} {i : pos} {cache : ParserCache} {stx : Syntax} {eps : Bool}
(h : Result.IsOk (@Result.error α msg i cache stx eps)) : β :=
False.elim (errorIsNotOk h)
def resultOk := {r : Result Unit // r.IsOk}
@[inline] def mkResultOk (i : pos) (cache : ParserCache) (State : ParserState) (eps := true) : resultOk :=
Result.ok () i cache State eps, Result.IsOk.mk _ _ _ _ _
def parserCoreM (α : Type) :=
ParserConfig resultOk Result α
abbreviation parserCore := parserCoreM Syntax
structure recParsers :=
(cmdParser : parserCore)
(termParser : Nat parserCore)
def parserM (α : Type) := recParsers parserCoreM α
abbreviation Parser := parserM Syntax
abbreviation trailingParser := Syntax Parser
@[inline] def command.Parser : Parser := λ ps, ps.cmdParser
@[inline] def Term.Parser (rbp : Nat := 0) : Parser := λ ps, ps.termParser rbp
@[inline] def parserM.pure {α : Type} (a : α) : parserM α :=
λ _ _ r,
match r with
| Result.ok _ it c s _, h := Result.ok a it c s true
| Result.error _ _ _ _ _, h := unreachableError h
@[inline_if_reduce] def eagerOr (b₁ b₂ : Bool) := b₁ || b₂
@[inline_if_reduce] def eagerAnd (b₁ b₂ : Bool) := b₁ && b₂
@[inline] def parserM.bind {α β : Type} (x : parserM α) (f : α parserM β) : parserM β :=
λ ps cfg r,
match x ps cfg r with
| Result.ok a i c s e₁ :=
(match f a ps cfg (mkResultOk i c s) with
| Result.ok b i c s e₂ := Result.ok b i c s (eagerAnd e₁ e₂)
| Result.error msg i c stx e₂ := Result.error msg i c stx (eagerAnd e₁ e₂))
| Result.error msg i c stx e := Result.error msg i c stx e
instance : Monad parserM :=
{pure := @parserM.pure, bind := @parserM.bind}
@[inline] protected def orelse {α : Type} (p q : parserM α) : parserM α :=
λ ps cfg r,
match r with
| Result.ok _ i₁ _ s₁ _, _ :=
(match p ps cfg r with
| Result.error msg₁ i₂ c₂ stx₁ true := q ps cfg (mkResultOk i₁ c₂ s₁)
| other := other)
| Result.error _ _ _ _ _, h := unreachableError h
@[inline] protected def failure {α : Type} : parserM α :=
λ _ _ r,
match r with
| Result.ok _ i c s _, h := Result.error "failure" i c Syntax.missing true
| Result.error _ _ _ _ _, h := unreachableError h
instance : Alternative parserM :=
{ orelse := @flatParser.orelse,
failure := @flatParser.failure,
..flatParser.Monad }
def setSilentError {α : Type} : Result α Result α
| (Result.error i c msg stx _) := Result.error i c msg stx true
| other := other
/--
`try p` behaves like `p`, but it pretends `p` hasn't
consumed any input when `p` fails.
-/
@[inline] def try {α : Type} (p : parserM α) : parserM α :=
λ ps cfg r, setSilentError (p ps cfg r)
@[inline] def atEnd (cfg : ParserConfig) (i : pos) : Bool :=
cfg.input.atEnd i
@[inline] def curr (cfg : ParserConfig) (i : pos) : Char :=
cfg.input.get i
@[inline] def next (cfg : ParserConfig) (i : pos) : pos :=
cfg.input.next i
@[inline] def inputSize (cfg : ParserConfig) : Nat :=
cfg.input.length
@[inline] def currPos : resultOk pos
| Result.ok _ i _ _ _, _ := i
| Result.error _ _ _ _ _, h := unreachableError h
@[inline] def currState : resultOk ParserState
| Result.ok _ _ _ s _, _ := s
| Result.error _ _ _ _ _, h := unreachableError h
def mkError {α : Type} (r : resultOk) (msg : String) (stx : Syntax := Syntax.missing) (eps := true) : Result α :=
match r with
| Result.ok _ i c s _, _ := Result.error msg i c stx eps
| Result.error _ _ _ _ _, h := unreachableError h
@[inline] def satisfy (p : Char Bool) : parserM Char :=
λ _ cfg r,
match r with
| Result.ok _ i ch st e, _ :=
if atEnd cfg i then mkError r "end of input"
else let c := curr cfg i in
if p c then Result.ok c (next cfg i) ch st false
else mkError r "unexpected character"
| Result.error _ _ _ _ _, h := unreachableError h
def any : parserM Char :=
satisfy (λ _, true)
@[specialize] def takeUntilAux (p : Char Bool) (cfg : ParserConfig) : Nat resultOk Result Unit
| 0 r := r.val
| (n+1) r :=
match r with
| Result.ok _ i ch st e, _ :=
if atEnd cfg i then r.val
else let c := curr cfg i in
if p c then r.val
else takeUntilAux n (mkResultOk (next cfg i) ch st true)
| Result.error _ _ _ _ _, h := unreachableError h
@[specialize] def takeUntil (p : Char Bool) : parserM Unit :=
λ ps cfg r, takeUntilAux p cfg (inputSize cfg) r
def takeUntilNewLine : parserM Unit :=
takeUntil (= '\n')
def whitespace : parserM Unit :=
takeUntil (λ c, !c.isWhitespace)
-- setOption Trace.Compiler.boxed True
--- setOption pp.implicit True
def strAux (cfg : ParserConfig) (str : String) (error : String) : Nat resultOk pos Result Unit
| 0 r j := mkError r error
| (n+1) r j :=
if str.atEnd j then r.val
else
match r with
| Result.ok _ i ch st e, _ :=
if atEnd cfg i then Result.error error i ch Syntax.missing true
else if curr cfg i = str.get j then strAux n (mkResultOk (next cfg i) ch st true) (str.next j)
else Result.error error i ch Syntax.missing true
| Result.error _ _ _ _ _, h := unreachableError h
-- #exit
@[inline] def str (s : String) : parserM Unit :=
λ ps cfg r, strAux cfg s ("expected " ++ repr s) (inputSize cfg) r 0
@[specialize] def manyAux (p : parserM Unit) : Nat Bool parserM Unit
| 0 fst := pure ()
| (k+1) fst := λ ps cfg r,
let i₀ := currPos r in
let s₀ := currState r in
match p ps cfg r with
| Result.ok a i c s _ := manyAux k false ps cfg (mkResultOk i c s)
| Result.error _ _ c _ _ := Result.ok () i₀ c s₀ fst
@[inline] def many (p : parserM Unit) : parserM Unit :=
λ ps cfg r, manyAux p (inputSize cfg) true ps cfg r
@[inline] def many1 (p : parserM Unit) : parserM Unit :=
p *> many p
def dummyParserCore : parserCore :=
λ cfg r, mkError r "dummy"
def testParser {α : Type} (x : parserM α) (input : String) : String :=
let r :=
x { cmdParser := dummyParserCore, termParser := λ _, dummyParserCore }
{ filename := "test", input := input, FileMap := FileMap.fromString input, tokens := Lean.Parser.Trie.empty }
(mkResultOk 0 {} {messages := MessageLog.empty}) in
match r with
| Result.ok _ i _ _ _ := "Ok at " ++ toString i
| Result.error msg i _ _ _ := "Error at " ++ toString i ++ ": " ++ msg
/-
mutual def recCmd, recTerm (parseCmd : Parser) (parseTerm : Nat → Parser) (parseLvl : Nat → parserCore)
with recCmd : Nat → parserCore
| 0 cfg r := mkError r "Parser: no progress"
| (n+1) cfg r := parseCmd ⟨recCmd n, parseLvl, recTerm n⟩ cfg r
with recTerm : Nat → Nat → parserCore
| 0 rbp cfg r := mkError r "Parser: no progress"
| (n+1) rbp cfg r := parseTerm rbp ⟨recCmd n, parseLvl, recTerm n⟩ cfg r
-/
/-
def runParser (x : Parser) (parseCmd : Parser) (parseLvl : Nat → Parser) (parseTerm : Nat → Parser)
(input : Iterator) (cfg : ParserConfig) : Result Syntax :=
let it := input in
let n := it.remaining in
let r := mkResultOk it {} {messages := MessageLog.Empty} in
let pl := recLvl (parseLvl) n in
let ps : recParsers := { cmdParser := recCmd parseCmd parseTerm pl n,
lvlParser := pl,
termParser := recTerm parseCmd parseTerm pl n } in
x ps cfg r
-/
structure parsingTables :=
(leadingTermParsers : TokenMap Parser)
(trailingTermParsers : TokenMap trailingParser)
abbreviation CommandParserM (α : Type) :=
parsingTables parserM α
end flatParser
end Lean
section
open Lean.flatParser
def flatP : parserM Unit :=
many1 (str "++" <|> str "**" <|> (str "--" *> takeUntil (= '\n') *> any *> pure ()))
end
section
open Lean.Parser
open Lean.Parser.MonadParsec
@[reducible] def Parser (α : Type) : Type := ReaderT Lean.flatParser.recParsers (ReaderT Lean.flatParser.ParserConfig (ParsecT Syntax (StateT ParserCache Id))) α
def testParsec (p : Parser Unit) (input : String) : String :=
let ps : Lean.flatParser.recParsers := { cmdParser := Lean.flatParser.dummyParserCore, termParser := λ _, Lean.flatParser.dummyParserCore } in
let cfg : Lean.flatParser.ParserConfig := { filename := "test", input := input, FileMap := Lean.flatParser.FileMap.fromString input, tokens := Lean.Parser.Trie.empty } in
let r := p ps cfg input.mkOldIterator {} in
match r with
| (Parsec.Result.ok _ it _, _) := "OK at " ++ toString it.offset
| (Parsec.Result.error msg _, _) := "Error " ++ msg.toString
@[inline] def str' (s : String) : Parser Unit :=
str s *> pure ()
def parsecP : Parser Unit :=
many1' (str' "++" <|> str' "**" <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ()))
def parsecP2 : Parser Unit :=
many1' ((parseStringLiteral *> whitespace *> pure ()) <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ()))
end
namespace BasicParser
open Lean.Parser
open Lean.Parser.MonadParsec
def testBasicParser (p : BasicParserM Unit) (input : String) : String :=
let cfg : Lean.Parser.ParserConfig := {
filename := "test", input := input, fileMap := { lines := {} }, tokens := Lean.Parser.Trie.empty } in
let r := p cfg input.mkOldIterator {} in
match r with
| (Parsec.Result.ok _ it _, _) := "OK at " ++ toString it.offset
| (Parsec.Result.error msg _, _) := "Error " ++ msg.toString
@[inline] def str' (s : String) : BasicParserM Unit :=
str s *> pure ()
def parserP : BasicParserM Unit :=
many1' (str' "++" <|> str' "**" <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ()))
def parser2 : BasicParserM Unit :=
many1' ((parseStringLiteral *> Lean.Parser.MonadParsec.whitespace *> pure ()) <|> (str "--" *> takeUntil (λ c, c = '\n') *> any *> pure ()))
def parser3 : BasicParserM Unit :=
Lean.Parser.whitespace
end BasicParser
def mkBigString : Nat String String
| 0 s := s
| (n+1) s := mkBigString n (s ++ "-- new comment\n")
def mkBigString2 : Nat String String
| 0 s := s
| (n+1) s := mkBigString2 n (s ++ "\"hello\\nworld\"\n-- comment\n")
def mkBigString3 : Nat String String
| 0 s := s
| (n+1) s := mkBigString3 n (s ++ "/- /- comment 1 -/ -/ \n -- comment 2 \n \t \n ")
@[noinline] def testFlatP (s : String) : IO Unit :=
IO.println (Lean.flatParser.testParser flatP s)
@[noinline] def testParsecP (p : Parser Unit) (s : String) : IO Unit :=
IO.println (testParsec p s)
@[noinline] def testBasicParser (p : Lean.Parser.BasicParserM Unit) (s : String) : IO Unit :=
IO.println (BasicParser.testBasicParser p s)
@[noinline] def prof {α : Type} (msg : String) (p : IO α) : IO α :=
let msg₁ := "Time for '" ++ msg ++ "':" in
let msg₂ := "Memory usage for '" ++ msg ++ "':" in
allocprof msg₂ (timeit msg₁ p)
def main (xs : List String) : IO Unit :=
-- let s₁ := mkBigString xs.head.toNat "" in
-- let s₂ := s₁ ++ "bad" ++ mkBigString 20 "" in
-- let s₃ := mkBigString2 xs.head.toNat "" in
let s₄ := mkBigString3 xs.head.toNat "" in
-- prof "flat Parser 1" (testFlatP s₁) *>
-- prof "flat Parser 2" (testFlatP s₂) *>
-- prof "Parsec 1" (testParsecP parsecP s₁) *>
-- prof "Parsec 2" (testParsecP parsecP s₂) *>
-- prof "Parsec 3" (testParsecP parsecP2 s₃) *>
prof "Basic parser 1" (testBasicParser BasicParser.parser3 s₄)