Compare commits

...

2 Commits

Author SHA1 Message Date
Leonardo de Moura
86b60c5a68 chore: fix test 2025-10-08 13:45:36 -07:00
Leonardo de Moura
556fe87f7e feat: hexnum parser
This PR adds a new helper parser for implementing parsers that contain
hexadecimal numbers. We are going to use it to implement anchors in
the `grind` interactive mode.
2025-10-08 13:33:45 -07:00
11 changed files with 105 additions and 4 deletions

View File

@@ -443,6 +443,10 @@ abbrev NumLit := TSyntax numLitKind
Syntax that represents macro hygiene info.
-/
abbrev HygieneInfo := TSyntax hygieneInfoKind
/--
Syntax that represent a hexadecimal number without the `0x` prefix.
-/
abbrev HexNum := TSyntax hexnumKind
end Syntax
@@ -1196,6 +1200,21 @@ Returns `0` if the syntax is malformed.
def getNat (s : NumLit) : Nat :=
s.raw.isNatLit?.getD 0
private def isHexNum? (stx : Syntax) : Option Nat :=
match Syntax.isLit? hexnumKind stx with
| some val => Syntax.decodeHexLitAux val 0 0
| _ => none
/-- Returns the value of the hexadecimal numeral as a natural number. -/
def getHexNumVal (s : Syntax.HexNum) : Nat :=
isHexNum? s.raw |>.getD 0
/-- Returns the number of hexadecimal digits. -/
def getHexNumSize (s : Syntax.HexNum) : Nat :=
match Syntax.isLit? hexnumKind s.raw with
| some val => val.utf8ByteSize
| _ => 0
/--
Extracts the parsed name from the syntax of an identifier.

View File

@@ -4969,9 +4969,17 @@ abbrev strLitKind : SyntaxNodeKind := `str
/-- `` `char `` is the node kind of character literals like `'A'`. -/
abbrev charLitKind : SyntaxNodeKind := `char
/-- `` `num `` is the node kind of number literals like `42`. -/
/-- `` `num `` is the node kind of number literals like `42` and `0xa1` -/
abbrev numLitKind : SyntaxNodeKind := `num
/--
`` `hexnum `` is the node kind of hexadecimal numbers like `ea10`
without the `0x` prefix. Recall that `hexnum` is not a token and must be prefixed.
For hexadecimal number literals, you should use `num` instead.
Example: `syntax anchor := "#" noWs hexnum`.
-/
abbrev hexnumKind : SyntaxNodeKind := `hexnum
/-- `` `scientific `` is the node kind of floating point literals like `1.23e-3`. -/
abbrev scientificLitKind : SyntaxNodeKind := `scientific

View File

@@ -80,4 +80,11 @@ public partial def getAnchor (e : Expr) : GrindM UInt64 := do
modify fun s => { s with anchors := s.anchors.insert { expr := e } a }
return a
/--
Example: `isAnchorPrefix 4 0x0c88 0x0c88ab10ef20206a` returns `true`
-/
public def isAnchorPrefix (numHexDigits : Nat) (anchorPrefix : UInt64) (anchor : UInt64) : Bool :=
let shift := 64 - numHexDigits.toUInt64*4
anchorPrefix == anchor >>> shift
end Lean.Meta.Grind

View File

@@ -51,6 +51,7 @@ builtin_initialize
register_parser_alias withoutPosition { stackSz? := none }
register_parser_alias withoutForbidden { stackSz? := none }
register_parser_alias (kind := interpolatedStrKind) interpolatedStr
register_parser_alias (kind := hexnumKind) hexnum
register_parser_alias orelse
register_parser_alias andthen { stackSz? := none }
register_parser_alias recover

View File

@@ -893,9 +893,9 @@ def octalNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) : Pars
private def isHexDigit (c : Char) : Bool :=
('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')
def hexNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) : ParserFn := fun c s =>
def hexNumberFn (startPos : String.Pos.Raw) (includeWhitespace := true) (kind := numLitKind) : ParserFn := fun c s =>
let s := takeDigitsFn isHexDigit "hexadecimal number" true c s
mkNodeToken numLitKind startPos includeWhitespace c s
mkNodeToken kind startPos includeWhitespace c s
def numberFnAux (includeWhitespace := true) : ParserFn := fun c s =>
let startPos := s.pos
@@ -910,7 +910,7 @@ def numberFnAux (includeWhitespace := true) : ParserFn := fun c s =>
else if curr == 'o' || curr == 'O' then
octalNumberFn startPos includeWhitespace c (s.next c i)
else if curr == 'x' || curr == 'X' then
hexNumberFn startPos includeWhitespace c (s.next c i)
hexNumberFn startPos includeWhitespace numLitKind c (s.next c i)
else
decimalNumberFn startPos includeWhitespace c (s.setPos i)
else if curr.isDigit then
@@ -1258,6 +1258,15 @@ def numLitNoAntiquot : Parser := {
info := mkAtomicInfo "num"
}
/-- Parses an hexadecimal numeral without the `0x` prefix. It is not a literal. -/
def hexnumFn : ParserFn := fun ctx s =>
hexNumberFn s.pos true hexnumKind ctx s
def hexnumNoAntiquot : Parser := {
fn := hexnumFn
info := mkAtomicInfo "hexnum"
}
def scientificLitFn : ParserFn := expectTokenFn scientificLitKind "scientific number"
def scientificLitNoAntiquot : Parser := {

View File

@@ -124,6 +124,17 @@ You can use `TSyntax.getNat` to extract the number from the resulting syntax obj
@[run_builtin_parser_attribute_hooks, builtin_doc] def numLit : Parser :=
withAntiquot (mkAntiquot "num" numLitKind) numLitNoAntiquot
/-- The parser `hexnum` parses a hexadecimal numeric literal not containing the `0x` prefix.
It produces a `hexnumKind` node containing an atom with the text of the
literal. This parser is mainly used for creating atoms such `#<hexnum>`. Recall that `hexnum`
is not a token and this parser must be prefixed by another parser.
For numerals such as `0xadef100a`, you should use `numLit`.
-/
@[builtin_doc] def hexnum : Parser :=
withAntiquot (mkAntiquot "hexnum" hexnumKind) hexnumNoAntiquot
/-- The parser `scientific` parses a scientific-notation literal, such as `1.3e-24`.
This parser has arity 1: it produces a `scientificLitKind` node containing an atom with the text

View File

@@ -506,6 +506,7 @@ def visitAtom (k : SyntaxNodeKind) : Formatter := do
@[combinator_formatter strLitNoAntiquot, expose] def strLitNoAntiquot.formatter := visitAtom strLitKind
@[combinator_formatter nameLitNoAntiquot, expose] def nameLitNoAntiquot.formatter := visitAtom nameLitKind
@[combinator_formatter numLitNoAntiquot, expose] def numLitNoAntiquot.formatter := visitAtom numLitKind
@[combinator_formatter scientificLitNoAntiquot, expose] def scientificLitNoAntiquot.formatter := visitAtom scientificLitKind
@[combinator_formatter fieldIdx, expose] def fieldIdx.formatter := visitAtom fieldIdxKind
@@ -570,6 +571,8 @@ def interpolatedStr.formatter (p : Formatter) : Formatter := do
| some str => push str *> goLeft
| none => p
@[combinator_formatter hexnumNoAntiquot, expose] def hexnum.formatter := visitAtom hexnumKind
@[combinator_formatter _root_.ite, expose, macro_inline] def ite {_ : Type} (c : Prop) [Decidable c] (t e : Formatter) : Formatter :=
if c then t else e

View File

@@ -563,6 +563,8 @@ def interpolatedStr.parenthesizer (p : Parenthesizer) : Parenthesizer := do
else
p
@[combinator_parenthesizer hexnumNoAntiquot, expose] def hexnum.parenthesizer := visitToken
@[combinator_parenthesizer _root_.ite, expose, macro_inline] def ite {_ : Type} (c : Prop) [Decidable c] (t e : Parenthesizer) : Parenthesizer :=
if c then t else e

View File

@@ -1,3 +1,4 @@
// update me!
#include "util/options.h"
namespace lean {

View File

@@ -15,12 +15,20 @@
{"label": "getName",
"kind": 3,
"data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getName"]},
{"label": "getHexNumVal",
"kind": 3,
"data":
["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getHexNumVal"]},
{"label": "getChar",
"kind": 3,
"data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getChar"]},
{"label": "raw",
"kind": 5,
"data": ["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.raw"]},
{"label": "getHexNumSize",
"kind": 3,
"data":
["«external:file:///1265.lean»", 0, 51, 1, "cLean.TSyntax.getHexNumSize"]},
{"label": "getDocString",
"kind": 3,
"data":
@@ -58,12 +66,20 @@
{"label": "getName",
"kind": 3,
"data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getName"]},
{"label": "getHexNumVal",
"kind": 3,
"data":
["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getHexNumVal"]},
{"label": "getChar",
"kind": 3,
"data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getChar"]},
{"label": "raw",
"kind": 5,
"data": ["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.raw"]},
{"label": "getHexNumSize",
"kind": 3,
"data":
["«external:file:///1265.lean»", 2, 53, 1, "cLean.TSyntax.getHexNumSize"]},
{"label": "getDocString",
"kind": 3,
"data":

View File

@@ -0,0 +1,24 @@
syntax "#" noWs hexnum : term
open Lean in
macro_rules
| `(#$n:hexnum) => `(($(quote n.getHexNumSize), $(quote n.getHexNumVal)))
/-- info: (4, 44733) : Nat × Nat -/
#guard_msgs in
#check #aebd
/-- info: (1, 10) : Nat × Nat -/
#guard_msgs in
#check #a
/-- info: (2, 16) : Nat × Nat -/
#guard_msgs in
#check #10
/-- info: (2, 5) : Nat × Nat -/
#guard_msgs in
#check #05
/-- info: (3, 10) : Nat × Nat -/
#guard_msgs in
#check #00a