mirror of
https://github.com/leanprover/lean4.git
synced 2026-03-17 18:34:06 +00:00
perf: optimize string literal equality simprocs for kernel efficiency
This PR optimizes the `String.reduceEq`, `String.reduceNe`, and `Sym.Simp` string equality simprocs to produce kernel-efficient proofs. Previously, these used `String.decEq` which forced the kernel to run UTF-8 encoding/decoding and byte array comparison, causing 86+ kernel unfoldings on short strings. The new approach uses `String.ofList_injective` combined with `congrArg (List.get?Internal · i)` at the first differing character position, reducing kernel work to O(first_diff_pos) list indexing plus a single character comparison. For equal strings, `eq_true rfl` avoids kernel evaluation entirely. The shared proof construction is in `Lean.Meta.mkStringLitNeProof`, used by both the standard simprocs and the `Sym.Simp` ground evaluator. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
67
src/Lean/Meta/StringLitProof.lean
Normal file
67
src/Lean/Meta/StringLitProof.lean
Normal file
@@ -0,0 +1,67 @@
|
||||
/-
|
||||
Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
|
||||
Released under Apache 2.0 license as described in the file LICENSE.
|
||||
Authors: Joachim Breitner
|
||||
-/
|
||||
module
|
||||
|
||||
prelude
|
||||
public import Lean.Meta.AppBuilder
|
||||
|
||||
public section
|
||||
|
||||
namespace Lean.Meta
|
||||
|
||||
/--
|
||||
Builds a proof of `String.ofList l₁ ≠ String.ofList l₂` for two distinct string literals.
|
||||
|
||||
The proof avoids both `String.decEq` (expensive UTF-8 byte array comparison) and full
|
||||
`List Char` comparison. Instead, it finds the first differing character position `i` and proves
|
||||
inequality via `congrArg (List.get?Internal · i)`, reducing the kernel work to O(i) list
|
||||
indexing plus a single character comparison. The full proof term is:
|
||||
|
||||
```
|
||||
mt String.ofList_injective (mt (congrArg (List.get?Internal · i)) (of_decide_eq_true rfl))
|
||||
```
|
||||
|
||||
where `rfl` proves `decide (l₁[i]? ≠ l₂[i]?) = true` on just two `Option Char` values.
|
||||
-/
|
||||
def mkStringLitNeProof (s₁ s₂ : String) : MetaM Expr := do
|
||||
let l₁ := s₁.toList
|
||||
let l₂ := s₂.toList
|
||||
let l₁Expr := toExpr l₁
|
||||
let l₂Expr := toExpr l₂
|
||||
-- Find the first differing character index
|
||||
let diffIdx := Id.run do
|
||||
let minLen := min l₁.length l₂.length
|
||||
for i in [:minLen] do
|
||||
if l₁[i]! ≠ l₂[i]! then return i
|
||||
return minLen
|
||||
let charConst := mkConst ``Char
|
||||
let listCharTy := mkApp (mkConst ``List [.zero]) charConst
|
||||
let optCharTy := mkApp (mkConst ``Option [.zero]) charConst
|
||||
let iExpr := toExpr diffIdx
|
||||
-- Build f = fun l => List.get?Internal l diffIdx
|
||||
let getFn := mkLambda `l .default listCharTy
|
||||
(mkApp3 (mkConst ``List.get?Internal [.zero]) charConst (mkBVar 0) iExpr)
|
||||
-- congrArg f (partially applied): l₁ = l₂ → l₁[i]? = l₂[i]?
|
||||
let congrArgFn := mkApp5 (mkConst ``congrArg [.succ .zero, .succ .zero])
|
||||
listCharTy optCharTy l₁Expr l₂Expr getFn
|
||||
-- Prove l₁[i]? ≠ l₂[i]? via decide on Option Char (trivial: one character comparison)
|
||||
let getL1 := mkApp3 (mkConst ``List.get?Internal [.zero]) charConst l₁Expr iExpr
|
||||
let getL2 := mkApp3 (mkConst ``List.get?Internal [.zero]) charConst l₂Expr iExpr
|
||||
let getEq := mkApp3 (mkConst ``Eq [.succ .zero]) optCharTy getL1 getL2
|
||||
let getNe := mkApp (mkConst ``Not) getEq
|
||||
let d ← mkDecide getNe
|
||||
let getNeProof := mkApp3 (mkConst ``of_decide_eq_true) getNe d.appArg! (← mkEqRefl (mkConst ``true))
|
||||
-- listNeProof : l₁ ≠ l₂ via mt (congrArg f) getNeProof
|
||||
let listEq := mkApp3 (mkConst ``Eq [.succ .zero]) listCharTy l₁Expr l₂Expr
|
||||
let listNeProof := mkApp4 (mkConst ``mt) listEq getEq congrArgFn getNeProof
|
||||
-- strNeProof : String.ofList l₁ ≠ String.ofList l₂ via mt ofList_injective listNeProof
|
||||
let strType := mkConst ``String
|
||||
let strEq := mkApp3 (mkConst ``Eq [.succ .zero]) strType
|
||||
(mkApp (mkConst ``String.ofList) l₁Expr) (mkApp (mkConst ``String.ofList) l₂Expr)
|
||||
return mkApp4 (mkConst ``mt) strEq listEq
|
||||
(mkApp2 (mkConst ``String.ofList_injective) l₁Expr l₂Expr) listNeProof
|
||||
|
||||
end Lean.Meta
|
||||
@@ -8,6 +8,7 @@ prelude
|
||||
public import Lean.Meta.Sym.Simp.SimpM
|
||||
import Init.Sym.Lemmas
|
||||
import Lean.Meta.Sym.LitValues
|
||||
import Lean.Meta.StringLitProof
|
||||
namespace Lean.Meta.Sym.Simp
|
||||
|
||||
/-!
|
||||
@@ -376,6 +377,25 @@ def evalFinPred (n : Expr) (trueThm falseThm : Expr) (op : {n : Nat} → Fin n
|
||||
|
||||
open Lean.Sym
|
||||
|
||||
/--
|
||||
Evaluates `@Eq String a b` for string literal arguments, producing kernel-efficient proofs.
|
||||
When equal, uses `eq_self` (no kernel evaluation needed). When different, uses
|
||||
`mkStringLitNeProof` which finds the first differing character position and proves
|
||||
inequality via `congrArg (List.get?Internal · i)`.
|
||||
-/
|
||||
private def evalStringEq (a b : Expr) : SimpM Result := do
|
||||
let some va := getStringValue? a | return .rfl
|
||||
let some vb := getStringValue? b | return .rfl
|
||||
if va = vb then
|
||||
let e ← getTrueExpr
|
||||
return .step e (mkApp2 (mkConst ``eq_self [.succ .zero]) (mkConst ``String) a) (done := true)
|
||||
else
|
||||
let neProof ← mkStringLitNeProof va vb
|
||||
let proof := mkApp2 (mkConst ``eq_false)
|
||||
(mkApp3 (mkConst ``Eq [.succ .zero]) (mkConst ``String) a b) neProof
|
||||
let e ← getFalseExpr
|
||||
return .step e proof (done := true)
|
||||
|
||||
def evalLT (α : Expr) (a b : Expr) : SimpM Result :=
|
||||
match_expr α with
|
||||
| Nat => evalBinPred getNatValue? (mkConst ``Nat.lt_eq_true) (mkConst ``Nat.lt_eq_false) (. < .) a b
|
||||
@@ -434,7 +454,7 @@ def evalEq (α : Expr) (a b : Expr) : SimpM Result :=
|
||||
| Fin n => evalFinPred n (mkConst ``Fin.eq_eq_true) (mkConst ``Fin.eq_eq_false) (. = .) a b
|
||||
| BitVec n => evalBitVecPred n (mkConst ``BitVec.eq_eq_true) (mkConst ``BitVec.eq_eq_false) (. = .) a b
|
||||
| Char => evalBinPred getCharValue? (mkConst ``Char.eq_eq_true) (mkConst ``Char.eq_eq_false) (. = .) a b
|
||||
| String => evalBinPred getStringValue? (mkConst ``String.eq_eq_true) (mkConst ``String.eq_eq_false) (. = .) a b
|
||||
| String => evalStringEq a b
|
||||
| _ => return .rfl
|
||||
|
||||
def evalDvd (α : Expr) (a b : Expr) : SimpM Result :=
|
||||
|
||||
@@ -7,6 +7,7 @@ module
|
||||
|
||||
prelude
|
||||
public import Lean.Meta.Tactic.Simp.BuiltinSimprocs.Char
|
||||
import Lean.Meta.StringLitProof
|
||||
|
||||
public section
|
||||
|
||||
@@ -57,6 +58,7 @@ builtin_dsimproc [simp, seval] reduceSingleton (String.singleton _) := fun e =>
|
||||
let some m ← fromExpr? e.appArg! | return .continue
|
||||
evalPropStep e (op n m)
|
||||
|
||||
|
||||
@[inline] def reduceBoolPred (declName : Name) (arity : Nat) (op : String → String → Bool) (e : Expr) : SimpM DStep := do
|
||||
unless e.isAppOfArity declName arity do return .continue
|
||||
let some n ← fromExpr? e.appFn!.appArg! | return .continue
|
||||
@@ -67,8 +69,18 @@ builtin_simproc [simp, seval] reduceLT (( _ : String) < _) := reduceBinPred ``
|
||||
builtin_simproc [simp, seval] reduceLE (( _ : String) ≤ _) := reduceBinPred ``LE.le 4 (. ≤ .)
|
||||
builtin_simproc [simp, seval] reduceGT (( _ : String) > _) := reduceBinPred ``GT.gt 4 (. > .)
|
||||
builtin_simproc [simp, seval] reduceGE (( _ : String) ≥ _) := reduceBinPred ``GE.ge 4 (. ≥ .)
|
||||
builtin_simproc [simp, seval] reduceEq (( _ : String) = _) := reduceBinPred ``Eq 3 (. = .)
|
||||
builtin_simproc [simp, seval] reduceNe (( _ : String) ≠ _) := reduceBinPred ``Ne 3 (. ≠ .)
|
||||
|
||||
builtin_simproc [simp, seval] reduceEq (( _ : String) = _) := fun e => do
|
||||
unless e.isAppOfArity ``Eq 3 do return .continue
|
||||
let some n ← fromExpr? e.appFn!.appArg! | return .continue
|
||||
let some m ← fromExpr? e.appArg! | return .continue
|
||||
evalEqPropStep e (n = m) (mkStringLitNeProof n m)
|
||||
|
||||
builtin_simproc [simp, seval] reduceNe (( _ : String) ≠ _) := fun e => do
|
||||
unless e.isAppOfArity ``Ne 3 do return .continue
|
||||
let some n ← fromExpr? e.appFn!.appArg! | return .continue
|
||||
let some m ← fromExpr? e.appArg! | return .continue
|
||||
evalNePropStep e (n ≠ m) (mkStringLitNeProof n m)
|
||||
builtin_dsimproc [simp, seval] reduceBEq (( _ : String) == _) := reduceBoolPred ``BEq.beq 4 (. == .)
|
||||
builtin_dsimproc [simp, seval] reduceBNe (( _ : String) != _) := reduceBoolPred ``bne 4 (. != .)
|
||||
|
||||
|
||||
@@ -24,4 +24,40 @@ def evalPropStep (p : Expr) (result : Bool) : SimpM Step := do
|
||||
else
|
||||
return .done { expr := mkConst ``False, proof? := mkAppN (mkConst ``eq_false_of_decide) #[p, d.appArg!, (← mkEqRefl (mkConst ``false))] }
|
||||
|
||||
/--
|
||||
Like `evalPropStep`, but specialized for `@Eq.{u} α a b`. When the values are equal, uses
|
||||
`eq_true rfl` which requires no kernel evaluation. When different, calls `mkNeProof` to build
|
||||
a proof of `a ≠ b`.
|
||||
-/
|
||||
def evalEqPropStep (e : Expr) (eq : Bool) (mkNeProof : SimpM Expr) : SimpM Step := do
|
||||
let α := e.appFn!.appFn!.appArg!
|
||||
let a := e.appFn!.appArg!
|
||||
let u := e.appFn!.appFn!.appFn!.constLevels!.head!
|
||||
if eq then
|
||||
let proof := mkApp2 (mkConst ``eq_true) e (mkApp2 (mkConst ``Eq.refl [u]) α a)
|
||||
return .done { expr := mkConst ``True, proof? := proof }
|
||||
else
|
||||
let neProof ← mkNeProof
|
||||
let proof := mkApp2 (mkConst ``eq_false) e neProof
|
||||
return .done { expr := mkConst ``False, proof? := proof }
|
||||
|
||||
/--
|
||||
Like `evalPropStep`, but specialized for `@Ne.{u} α a b`. When the values differ, calls
|
||||
`mkNeProof` to build a proof of `a ≠ b`. When equal, uses `eq_false (not_not_intro rfl)`
|
||||
which requires no kernel evaluation.
|
||||
-/
|
||||
def evalNePropStep (e : Expr) (ne : Bool) (mkNeProof : SimpM Expr) : SimpM Step := do
|
||||
if ne then
|
||||
let neProof ← mkNeProof
|
||||
let proof := mkApp2 (mkConst ``eq_true) e neProof
|
||||
return .done { expr := mkConst ``True, proof? := proof }
|
||||
else
|
||||
let α := e.appFn!.appFn!.appArg!
|
||||
let a := e.appFn!.appArg!
|
||||
let u := e.appFn!.appFn!.appFn!.constLevels!.head!
|
||||
let eqProp := mkApp3 (mkConst ``Eq [u]) α a a
|
||||
let rflExpr := mkApp2 (mkConst ``Eq.refl [u]) α a
|
||||
let proof := mkApp2 (mkConst ``eq_false) e (mkApp2 (mkConst ``not_not_intro) eqProp rflExpr)
|
||||
return .done { expr := mkConst ``False, proof? := proof }
|
||||
|
||||
end Lean.Meta.Simp
|
||||
|
||||
65
tests/elab/string_neq_kernel_cost.lean
Normal file
65
tests/elab/string_neq_kernel_cost.lean
Normal file
@@ -0,0 +1,65 @@
|
||||
/-!
|
||||
Tests that string equality/inequality simprocs produce kernel-efficient proofs.
|
||||
|
||||
Previously, `String.reduceNe` and `String.reduceEq` used `decide` with `String.decEq`, which
|
||||
forced the kernel to run UTF-8 encoding/decoding and byte array comparison. This caused a high
|
||||
number of kernel unfoldings (e.g. 86 for `List.rec` on short strings).
|
||||
|
||||
The fix uses `String.ofList_injective` to reduce to `List Char` comparison, which is much
|
||||
cheaper in the kernel since string literals are `String.ofList [chars]` at the kernel level.
|
||||
-/
|
||||
|
||||
-- String.reduceNe: different strings (the primary use case)
|
||||
-- Kernel should NOT unfold ByteArray/utf8 functions
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "hello" ≠ "foo" := by simp
|
||||
|
||||
-- String.reduceNe: equal strings
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "hello" ≠ "hello" ↔ False := by simp
|
||||
|
||||
-- String.reduceEq: equal strings
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "hello" = "hello" := by simp
|
||||
|
||||
-- String.reduceEq: different strings
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "hello" = "foo" ↔ False := by simp
|
||||
|
||||
-- Corner case: empty string vs nonempty
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "" ≠ "a" := by simp
|
||||
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "a" ≠ "" := by simp
|
||||
|
||||
-- Corner case: one string is a prefix of the other
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "foo" ≠ "foobar" := by simp
|
||||
|
||||
set_option diagnostics true in
|
||||
set_option diagnostics.threshold 17 in
|
||||
/-- -/
|
||||
#guard_msgs in
|
||||
example : "foobar" ≠ "foo" := by simp
|
||||
Reference in New Issue
Block a user