mirror of
https://github.com/leanprover/lean4.git
synced 2026-03-17 18:34:06 +00:00
feat: leading whitespace on first token (#12662)
This PR adjusts the module parser to set the leading whitespace of the first token to the whitespace up to that token. If there are no actual tokens in the file, the leading whitespace is set on the final (empty) EOI token. This ensures that we do not lose the initial whitespace (e.g. comments) of a file in `Syntax`. (Tests generated/adjusted by Claude) Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -917,7 +917,11 @@ def lean (name : Option Ident := none) (error warning : flag false) («show» :
|
||||
let scopes := (← get).scopes
|
||||
let (cmds, cmdState, trees) ← withSaveInfoContext do
|
||||
let mut cmdState : Command.State := { env, maxRecDepth := ← MonadRecDepth.getMaxRecDepth, scopes }
|
||||
let mut pstate : Parser.ModuleParserState := {pos := pos, recovering := false}
|
||||
let mut pstate : Parser.ModuleParserState := {
|
||||
pos
|
||||
recovering := false
|
||||
hasLeading := false
|
||||
}
|
||||
let mut cmds := #[]
|
||||
repeat
|
||||
let scope := cmdState.scopes.head!
|
||||
|
||||
@@ -26,7 +26,12 @@ end Module
|
||||
|
||||
structure ModuleParserState where
|
||||
pos : String.Pos.Raw := 0
|
||||
recovering : Bool := false
|
||||
recovering : Bool := false
|
||||
/--
|
||||
Whether there is leading whitespace before `pos`.
|
||||
Used to associate whitespace at the start of the file with the first token of the file.
|
||||
-/
|
||||
hasLeading : Bool := true
|
||||
deriving Inhabited
|
||||
|
||||
private partial def mkErrorMessage (c : InputContext) (pos : String.Pos.Raw) (stk : SyntaxStack) (e : Parser.Error) : Message := Id.run do
|
||||
@@ -60,6 +65,12 @@ where
|
||||
if let .original (trailing := trailing) .. := stx.getTailInfo then pure (some trailing)
|
||||
else none
|
||||
|
||||
private def setStartOfFileLeading (stx : Syntax) : Syntax × Bool := Id.run do
|
||||
let some (.original leading pos trailing endPos) ← stx.getHeadInfo?
|
||||
| return (stx, false)
|
||||
let info := .original { leading with startPos := 0 } pos trailing endPos
|
||||
return (stx.setHeadInfo info, true)
|
||||
|
||||
def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × ModuleParserState × MessageLog) := do
|
||||
let dummyEnv ← mkEmptyEnvironment
|
||||
let p := andthenFn whitespace Module.header.fn
|
||||
@@ -96,10 +107,12 @@ def parseHeader (inputCtx : InputContext) : IO (TSyntax ``Module.header × Modul
|
||||
consider using separate `public import {mod}` and `import all {mod}` directives \
|
||||
in order to import public data into the public scope and private data into the \
|
||||
private scope."
|
||||
pure (⟨stx⟩, {pos := s.pos, recovering := s.hasError}, messages)
|
||||
let (stx, isLeadingSet) := setStartOfFileLeading stx
|
||||
pure (⟨stx⟩, { pos := s.pos, recovering := s.hasError, hasLeading := ! isLeadingSet }, messages)
|
||||
|
||||
private def mkEOI (pos : String.Pos.Raw) : Syntax :=
|
||||
let atom := mkAtom (SourceInfo.original "".toRawSubstring pos "".toRawSubstring pos) ""
|
||||
private def mkEOI (inputCtx : InputContext) (pos : String.Pos.Raw) : Syntax :=
|
||||
let emptyAt := inputCtx.substring pos pos
|
||||
let atom := mkAtom (SourceInfo.original emptyAt pos emptyAt pos) ""
|
||||
mkNode ``Command.eoi #[atom]
|
||||
|
||||
def isTerminalCommand (s : Syntax) : Bool :=
|
||||
@@ -122,7 +135,7 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
|
||||
let mut stx := Syntax.missing -- will always be assigned below
|
||||
repeat
|
||||
if inputCtx.atEnd pos then
|
||||
stx := mkEOI pos
|
||||
stx := mkEOI inputCtx pos
|
||||
break
|
||||
let pos' := pos
|
||||
let p := andthenFn whitespace topLevelCommandParserFn
|
||||
@@ -155,7 +168,10 @@ partial def parseCommand (inputCtx : InputContext) (pmctx : ParserModuleContext)
|
||||
else
|
||||
stx := s.stxStack.back
|
||||
break
|
||||
return (stx, { pos, recovering }, messages)
|
||||
if mps.hasLeading then
|
||||
-- File header was empty - set start-of-file leading whitespace on first token in `stx`
|
||||
(stx, _) := setStartOfFileLeading stx
|
||||
return (stx, { pos, recovering, hasLeading := false }, messages)
|
||||
|
||||
-- only useful for testing since most Lean files cannot be parsed without elaboration
|
||||
|
||||
@@ -165,7 +181,7 @@ partial def testParseModuleAux (env : Environment) (inputCtx : InputContext) (s
|
||||
| (stx, state, msgs) =>
|
||||
if isTerminalCommand stx then
|
||||
if !msgs.hasUnreported then
|
||||
pure stxs
|
||||
pure (stxs.push stx)
|
||||
else do
|
||||
msgs.forM fun msg => msg.toString >>= IO.println
|
||||
throw (IO.userError "failed to parse file")
|
||||
@@ -173,14 +189,14 @@ partial def testParseModuleAux (env : Environment) (inputCtx : InputContext) (s
|
||||
parse state msgs (stxs.push stx)
|
||||
parse s msgs stxs
|
||||
|
||||
def testParseModule (env : Environment) (fname contents : String) : IO (TSyntax ``Parser.Module.module) := do
|
||||
def testParseModule (env : Environment) (fname contents : String) : IO Syntax := do
|
||||
let inputCtx := mkInputContext contents fname
|
||||
let (header, state, messages) ← parseHeader inputCtx
|
||||
let cmds ← testParseModuleAux env inputCtx state messages #[]
|
||||
let stx := mkNode `Lean.Parser.Module.module #[header, mkListNode cmds]
|
||||
pure ⟨stx.raw.updateLeading⟩
|
||||
pure stx
|
||||
|
||||
def testParseFile (env : Environment) (fname : System.FilePath) : IO (TSyntax ``Parser.Module.module) := do
|
||||
def testParseFile (env : Environment) (fname : System.FilePath) : IO Syntax := do
|
||||
let contents ← IO.FS.readFile fname
|
||||
testParseModule env fname.toString contents
|
||||
|
||||
|
||||
@@ -12,20 +12,27 @@ unsafe def main (args : List String) : IO Unit := do
|
||||
| [f] => (false, f)
|
||||
| _ => panic! "usage: file [-d]"
|
||||
let env ← mkEmptyEnvironment
|
||||
let stx ← Lean.Parser.testParseFile env fn
|
||||
let stx₀ ← Lean.Parser.testParseFile env fn
|
||||
-- `testParseFile` matches the real pipeline (no `updateLeading`), but reformatting
|
||||
-- needs leading info to correctly place inter-declaration comments, just like
|
||||
-- `script/reformat.lean` which explicitly calls `updateLeading`.
|
||||
let stx : Syntax := stx₀.updateLeading
|
||||
let act : CoreM Format := do
|
||||
withOptions (fun opts =>
|
||||
opts
|
||||
-- Name sanitization clears inline comments attached to identifiers.
|
||||
|>.set `pp.sanitizeNames false
|
||||
|>.set `trace.PrettyPrinter.format debug) do
|
||||
tryFinally (PrettyPrinter.ppModule stx) printTraces
|
||||
tryFinally (PrettyPrinter.ppModule ⟨stx⟩) printTraces
|
||||
let (f, _) ← act.toIO { fileName := "", fileMap := default } { env := env }
|
||||
-- `testParseModule` includes the EOI token in the command list (matching the real
|
||||
-- pipeline), so `ppModule` emits two trailing newlines via the `ppLine >> ppLine`
|
||||
-- separator for EOI's empty command text.
|
||||
IO.print f
|
||||
let stx' ← Lean.Parser.testParseModule env fn (toString f)
|
||||
if stx' != stx then
|
||||
let stx := stx.raw.getArg 1
|
||||
let stx' := stx'.raw.getArg 1
|
||||
let stx := stx.getArg 1
|
||||
let stx' := stx'.getArg 1
|
||||
stx.getArgs.size.forM fun i _ => do
|
||||
if stx.getArg i != stx'.getArg i then
|
||||
throw $ IO.userError s!"reparsing failed:\n{stx.getArg i}\n{stx'.getArg i}"
|
||||
|
||||
@@ -326,3 +326,5 @@ instance {α : Type u} [DecidableEq α] : BEq α :=
|
||||
def dite {α : Sort u} (c : Prop) [h : Decidable c] (t : c → α) (e : Not c → α) : α :=
|
||||
Decidable.casesOn (motive := fun _ => α) h e t
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -33,10 +33,13 @@ let (debug, f) : Bool × String := match args with
|
||||
| _ => panic! "usage: file [-d]";
|
||||
let env ← mkEmptyEnvironment;
|
||||
let stx ← Lean.Parser.testParseFile env args.head!;
|
||||
let header := stx.raw.getArg 0;
|
||||
-- `testParseFile` matches the real pipeline (no `updateLeading`), but reprinting
|
||||
-- needs leading info to correctly place inter-declaration whitespace.
|
||||
let stx := stx.updateLeading;
|
||||
let header := stx.getArg 0;
|
||||
let some s ← pure header.reprint | throw $ IO.userError "header reprint failed";
|
||||
IO.print s;
|
||||
let cmds := (stx.raw.getArg 1).getArgs;
|
||||
let cmds := (stx.getArg 1).getArgs;
|
||||
cmds.forM $ fun cmd => do
|
||||
let cmd := unparen cmd;
|
||||
let (cmd, _) ← (tryFinally (PrettyPrinter.parenthesizeCommand cmd) printTraces).toIO { options := Options.empty.set `trace.PrettyPrinter.parenthesize debug, fileName := "", fileMap := default } { env := env };
|
||||
|
||||
95
tests/lean/run/moduleHeaderLeadingWhitespace.lean
Normal file
95
tests/lean/run/moduleHeaderLeadingWhitespace.lean
Normal file
@@ -0,0 +1,95 @@
|
||||
-- Tests that file-start whitespace and comments are associated with the leading
|
||||
-- of the first actual token in the file (in the module header or first command).
|
||||
import Lean
|
||||
|
||||
open Lean Parser
|
||||
|
||||
/-- Parse a header string and return the leading Substring.Raw of the first token. -/
|
||||
def getFirstTokenLeading (input : String) : IO (Option Substring.Raw) := do
|
||||
let inputCtx := mkInputContext input "<test>"
|
||||
let (header, _, _) ← parseHeader inputCtx
|
||||
return match header.raw.getHeadInfo? with
|
||||
| some (.original leading ..) => some leading
|
||||
| _ => none
|
||||
|
||||
-- Test 1: file-start line comment before import
|
||||
-- The leading of the `import` keyword should start at position 0 and contain the comment.
|
||||
/--
|
||||
info: leading startPos: 0
|
||||
leading content: -- file-start comment
|
||||
-/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let some leading ← getFirstTokenLeading "-- file-start comment\nimport Foo\n"
|
||||
| IO.println "no leading found"
|
||||
IO.println s!"leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"leading content: {Substring.Raw.toString leading}"
|
||||
|
||||
-- Test 2: file-start block comment before import
|
||||
/--
|
||||
info: leading startPos: 0
|
||||
leading content: /- block comment -/
|
||||
-/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let some leading ← getFirstTokenLeading "/- block comment -/\nimport Foo\n"
|
||||
| IO.println "no leading found"
|
||||
IO.println s!"leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"leading content: {Substring.Raw.toString leading}"
|
||||
|
||||
-- Test 3: no file-start whitespace (import is the very first thing)
|
||||
-- The leading should start at position 0 with stopPos 0 (empty leading).
|
||||
/--
|
||||
info: leading startPos: 0
|
||||
leading stopPos: 0 -/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let some leading ← getFirstTokenLeading "import Foo\n"
|
||||
| IO.println "no leading found"
|
||||
IO.println s!"leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"leading stopPos: {leading.stopPos.byteIdx}"
|
||||
|
||||
-- Test 4: file with only newlines at the start (no comments)
|
||||
/--
|
||||
info: leading startPos: 0
|
||||
leading content:
|
||||
|
||||
-/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let some leading ← getFirstTokenLeading "\n\nimport Foo\n"
|
||||
| IO.println "no leading found"
|
||||
IO.println s!"leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"leading content: {Substring.Raw.toString leading}"
|
||||
|
||||
-- Test 5: entirely empty file (only a comment, no header or body)
|
||||
-- The EOI token's leading should include the file-start comment.
|
||||
/--
|
||||
info: first token leading startPos: 0
|
||||
first token leading content: -- just a comment
|
||||
-/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let env ← mkEmptyEnvironment
|
||||
let stx ← testParseModule env "<test>" "-- just a comment\n"
|
||||
match stx.getHeadInfo? with
|
||||
| some (.original leading ..) =>
|
||||
IO.println s!"first token leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"first token leading content: {Substring.Raw.toString leading}"
|
||||
| _ => IO.println "no original leading found"
|
||||
|
||||
-- Test 6: empty header (no imports) - file-start comment before a def
|
||||
-- The leading of the first command token should include the file-start comment.
|
||||
/--
|
||||
info: first token leading startPos: 0
|
||||
first token leading content: -- file-start comment
|
||||
-/
|
||||
#guard_msgs in
|
||||
#eval do
|
||||
let env ← mkEmptyEnvironment
|
||||
let stx ← testParseModule env "<test>" "-- file-start comment\ndef foo := 1\n"
|
||||
match stx.getHeadInfo? with
|
||||
| some (.original leading ..) =>
|
||||
IO.println s!"first token leading startPos: {leading.startPos.byteIdx}"
|
||||
IO.println s!"first token leading content: {Substring.Raw.toString leading}"
|
||||
| _ => IO.println "no original leading found"
|
||||
Reference in New Issue
Block a user