Compare commits

...

4 Commits

Author SHA1 Message Date
Kim Morrison
1c2e5c6095 unused variable 2025-12-08 14:07:14 +11:00
Kim Morrison
8ff10de21a fix: mark unused variable with underscore
The `lastCharWasDigit` parameter in the fold lambda was not used in the
function body, only tracked in the result tuple. Mark it as unused with
an underscore prefix to satisfy the linter.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-08 13:09:17 +11:00
Kim Morrison
dfa53065a4 fix: use {lit} role for underscore in documentation 2025-12-08 10:09:02 +11:00
Kim Morrison
776680ee90 feat: support underscores in String.toNat? and String.toInt?
This PR adds support for underscores as digit separators in String.toNat?, String.toInt?, and related parsing functions. This makes the string parsing functions consistent with Lean's numeric literal syntax, which already supports underscores for readability (e.g., 100_000_000).

The implementation validates that underscores:
- Cannot appear at the start or end of the number
- Cannot appear consecutively
- Are ignored when calculating the numeric value

This resolves a common source of friction when parsing user input from command-line arguments, environment variables, or configuration files, where users naturally expect to use the same numeric syntax they use in source code.

Closes #11538

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-08 09:52:51 +11:00
3 changed files with 134 additions and 10 deletions

View File

@@ -1249,7 +1249,8 @@ def foldr {α : Type u} (f : Char → αα) (init : α) (s : Slice) : α :=
Checks whether the slice can be interpreted as the decimal representation of a natural number.
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear
at the start, at the end, or consecutively.
Use {name (scope := "Init.Data.String.Slice")}`toNat?` or
{name (scope := "Init.Data.String.Slice")}`toNat!` to convert such a slice to a natural number.
@@ -1260,21 +1261,39 @@ Examples:
* {lean}`"5".toSlice.isNat = true`
* {lean}`"05".toSlice.isNat = true`
* {lean}`"587".toSlice.isNat = true`
* {lean}`"1_000".toSlice.isNat = true`
* {lean}`"100_000_000".toSlice.isNat = true`
* {lean}`"-587".toSlice.isNat = false`
* {lean}`" 5".toSlice.isNat = false`
* {lean}`"2+3".toSlice.isNat = false`
* {lean}`"0xff".toSlice.isNat = false`
* {lean}`"_123".toSlice.isNat = false`
* {lean}`"123_".toSlice.isNat = false`
* {lean}`"12__34".toSlice.isNat = false`
-/
@[inline]
def isNat (s : Slice) : Bool :=
!s.isEmpty && s.all Char.isDigit
if s.isEmpty then
false
else
-- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid
let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c =>
let isDigit := c.isDigit
let isUnderscore := c = '_'
let newValid := valid && (isDigit || isUnderscore) &&
!(isFirst && isUnderscore) && -- Cannot start with underscore
!(lastWasUnderscore && isUnderscore) -- No consecutive underscores
(false, isUnderscore, isDigit, newValid))
(true, false, false, true)
-- Must be valid and last character must have been a digit (not underscore)
result.2.2.2 && result.2.2.1
/--
Interprets a slice as the decimal representation of a natural number, returning it. Returns
{name}`none` if the slice does not contain a decimal natural number.
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.
Use {name}`isNat` to check whether {name}`toNat?` would return {name}`some`.
{name (scope := "Init.Data.String.Slice")}`toNat!` is an alternative that panics instead of
@@ -1285,6 +1304,8 @@ Examples:
* {lean}`"0".toSlice.toNat? = some 0`
* {lean}`"5".toSlice.toNat? = some 5`
* {lean}`"587".toSlice.toNat? = some 587`
* {lean}`"1_000".toSlice.toNat? = some 1000`
* {lean}`"100_000_000".toSlice.toNat? = some 100000000`
* {lean}`"-587".toSlice.toNat? = none`
* {lean}`" 5".toSlice.toNat? = none`
* {lean}`"2+3".toSlice.toNat? = none`
@@ -1292,7 +1313,7 @@ Examples:
-/
def toNat? (s : Slice) : Option Nat :=
if s.isNat then
some <| s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
some <| s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0
else
none
@@ -1301,7 +1322,7 @@ Interprets a slice as the decimal representation of a natural number, returning
slice does not contain a decimal natural number.
A slice can be interpreted as a decimal natural number if it is not empty and all the characters in
it are digits.
it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.
Use {name}`isNat` to check whether {name}`toNat!` would return a value. {name}`toNat?` is a safer
alternative that returns {name}`none` instead of panicking when the string is not a natural number.
@@ -1310,10 +1331,11 @@ Examples:
* {lean}`"0".toSlice.toNat! = 0`
* {lean}`"5".toSlice.toNat! = 5`
* {lean}`"587".toSlice.toNat! = 587`
* {lean}`"1_000".toSlice.toNat! = 1000`
-/
def toNat! (s : Slice) : Nat :=
if s.isNat then
s.foldl (fun n c => n * 10 + (c.toNat - '0'.toNat)) 0
s.foldl (fun n c => if c = '_' then n else n * 10 + (c.toNat - '0'.toNat)) 0
else
panic! "Nat expected"

View File

@@ -403,25 +403,39 @@ Examples:
Checks whether the substring can be interpreted as the decimal representation of a natural number.
A substring can be interpreted as a decimal natural number if it is not empty and all the characters
in it are digits.
in it are digits. Underscores ({lit}`_`) are allowed as digit separators for readability, but cannot appear
at the start, at the end, or consecutively.
Use `Substring.toNat?` to convert such a substring to a natural number.
-/
@[inline] def isNat (s : Substring.Raw) : Bool :=
!s.isEmpty && s.all fun c => c.isDigit
if s.isEmpty then
false
else
-- Track: isFirst, lastWasUnderscore, lastCharWasDigit, valid
let result := s.foldl (fun (isFirst, lastWasUnderscore, _lastCharWasDigit, valid) c =>
let isDigit := c.isDigit
let isUnderscore := c = '_'
let newValid := valid && (isDigit || isUnderscore) &&
!(isFirst && isUnderscore) && -- Cannot start with underscore
!(lastWasUnderscore && isUnderscore) -- No consecutive underscores
(false, isUnderscore, isDigit, newValid))
(true, false, false, true)
-- Must be valid and last character must have been a digit (not underscore)
result.2.2.2 && result.2.2.1
/--
Checks whether the substring can be interpreted as the decimal representation of a natural number,
returning the number if it can.
A substring can be interpreted as a decimal natural number if it is not empty and all the characters
in it are digits.
in it are digits. Underscores ({lit}`_`) are allowed as digit separators and are ignored during parsing.
Use `Substring.isNat` to check whether the substring is such a substring.
-/
def toNat? (s : Substring.Raw) : Option Nat :=
if s.isNat then
some <| s.foldl (fun n c => n*10 + (c.toNat - '0'.toNat)) 0
some <| s.foldl (fun n c => if c = '_' then n else n*10 + (c.toNat - '0'.toNat)) 0
else
none

View File

@@ -0,0 +1,88 @@
/-!
Tests for `String.toNat?` and `String.toInt?` with underscore support.
This ensures that numeric parsing functions accept the same underscore digit separators
that are allowed in Lean numeric literals, for consistency.
See: https://github.com/leanprover/lean4/issues/11538
-/
-- Basic underscore tests for String.Slice.isNat
#guard "1_000".toSlice.isNat = true
#guard "100_000".toSlice.isNat = true
#guard "1_000_000".toSlice.isNat = true
#guard "100_000_000".toSlice.isNat = true
-- Edge cases for isNat
#guard "_123".toSlice.isNat = false -- Cannot start with underscore
#guard "123_".toSlice.isNat = false -- Cannot end with underscore
#guard "12__34".toSlice.isNat = false -- Cannot have consecutive underscores
#guard "1_2_3".toSlice.isNat = true -- Single underscores are fine
-- toNat? basic tests
#guard "1_000".toSlice.toNat? = some 1000
#guard "100_000".toSlice.toNat? = some 100000
#guard "1_000_000".toSlice.toNat? = some 1000000
#guard "100_000_000".toSlice.toNat? = some 100000000
-- More complex patterns
#guard "1_2_3_4_5".toSlice.toNat? = some 12345
#guard "9_99".toSlice.toNat? = some 999
#guard "5_0_0_0".toSlice.toNat? = some 5000
-- Edge cases that should fail
#guard "_123".toSlice.toNat? = none
#guard "123_".toSlice.toNat? = none
#guard "12__34".toSlice.toNat? = none
#guard "12_3_".toSlice.toNat? = none
#guard "_".toSlice.toNat? = none
-- Verify numeric values are correct
#guard "1_234".toSlice.toNat? = "1234".toSlice.toNat?
#guard "987_654_321".toSlice.toNat? = "987654321".toSlice.toNat?
-- Tests for String.toNat? (not just Slice)
#guard "1_000".toNat? = some 1000
#guard "100_000".toNat? = some 100000
#guard "1_000_000".toNat? = some 1000000
#guard "_123".toNat? = none
#guard "123_".toNat? = none
-- Tests for String.Slice.isInt
#guard "-1_000".toSlice.isInt = true
#guard "-100_000".toSlice.isInt = true
#guard "1_000".toSlice.isInt = true -- Positive numbers are also ints
-- Edge cases for isInt
#guard "-_123".toSlice.isInt = false -- Cannot have underscore right after minus
#guard "-123_".toSlice.isInt = false -- Cannot end with underscore
#guard "-12__34".toSlice.isInt = false -- Cannot have consecutive underscores
-- toInt? basic tests
#guard "-1_000".toSlice.toInt? = some (-1000)
#guard "-100_000".toSlice.toInt? = some (-100000)
#guard "-1_000_000".toSlice.toInt? = some (-1000000)
#guard "1_000".toSlice.toInt? = some 1000
#guard "100_000".toSlice.toInt? = some 100000
-- Edge cases for toInt?
#guard "-_123".toSlice.toInt? = none
#guard "-123_".toSlice.toInt? = none
#guard "-12__34".toSlice.toInt? = none
-- Verify numeric values are correct
#guard "-1_234".toSlice.toInt? = "-1234".toSlice.toInt?
#guard "987_654_321".toSlice.toInt? = "987654321".toSlice.toInt?
-- Tests for String.toInt? (not just Slice)
#guard "-1_000".toInt? = some (-1000)
#guard "1_000".toInt? = some 1000
#guard "-_123".toInt? = none
#guard "-123_".toInt? = none
-- Compatibility with existing behavior (no underscores)
#guard "0".toNat? = some 0
#guard "123".toNat? = some 123
#guard "".toNat? = none
#guard "-456".toInt? = some (-456)
#guard "789".toInt? = some 789