From c2d4079193fa62cbd177059d9022c7a7ae097c42 Mon Sep 17 00:00:00 2001 From: Joachim Breitner Date: Sat, 14 Mar 2026 11:30:31 +0100 Subject: [PATCH] perf: optimize string literal equality simprocs for kernel efficiency (#12887) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR optimizes the `String.reduceEq`, `String.reduceNe`, and `Sym.Simp` string equality simprocs to produce kernel-efficient proofs. Previously, these used `String.decEq` which forced the kernel to run UTF-8 encoding/decoding and byte array comparison, causing 86+ kernel unfoldings on short strings. The new approach reduces string inequality to `List Char` via `String.ofList_injective`, then uses two strategies depending on the difference: - **Different characters at position `i`**: Projects to `Nat` via `congrArg (fun l => (List.get!Internal l i).toNat)`, then uses `Nat.ne_of_beq_eq_false rfl`. This avoids `Decidable` instances entirely — the kernel only evaluates `Nat.beq` on two concrete natural numbers. - **One string is a prefix of the other**: Uses `congrArg (List.drop n ·)` with `List.cons_ne_nil`, which is a definitional proof requiring no `decide` step at all. For equal strings, `eq_true rfl` avoids kernel evaluation entirely. The shared proof construction is in `Lean.Meta.mkStringLitNeProof` (`Lean/Meta/StringLitProof.lean`), used by both the standard simprocs and the `Sym.Simp` ground evaluator. Kernel max unfolds for `"hello" ≠ "foo"`: 86+ → 6. --------- Co-authored-by: Claude Opus 4.6 --- src/Lean/Meta/StringLitProof.lean | 111 ++++++++++++++++++ src/Lean/Meta/Sym/Simp/EvalGround.lean | 22 +++- .../Tactic/Simp/BuiltinSimprocs/String.lean | 16 ++- .../Tactic/Simp/BuiltinSimprocs/Util.lean | 36 ++++++ tests/elab/string_neq_kernel_cost.lean | 65 ++++++++++ 5 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 src/Lean/Meta/StringLitProof.lean create mode 100644 tests/elab/string_neq_kernel_cost.lean diff --git a/src/Lean/Meta/StringLitProof.lean b/src/Lean/Meta/StringLitProof.lean new file mode 100644 index 0000000000..3de04846d2 --- /dev/null +++ b/src/Lean/Meta/StringLitProof.lean @@ -0,0 +1,111 @@ +/- +Copyright (c) 2026 Lean FRO, LLC. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Joachim Breitner +-/ +module + +prelude +public import Lean.Meta.AppBuilder + +public section + +namespace Lean.Meta + +/-- +Builds a proof of `String.ofList l₁ ≠ String.ofList l₂` for two distinct string literals. + +The proof avoids both `String.decEq` (expensive UTF-8 byte array comparison) and full +`List Char` comparison. It reduces to `List Char` via `String.ofList_injective`, then +proves the lists differ using one of two strategies depending on whether the strings +share a common prefix of the shorter string's full length: + +- **Different characters at position `i`**: Uses + `congrArg (fun l => (List.get!Internal l i).toNat)` to project down to `Nat`, then + `Nat.ne_of_beq_eq_false rfl` for a single `Nat.beq` comparison. This avoids `Decidable` + instances entirely — the kernel only evaluates `Nat.beq` on two concrete natural numbers. + +- **One string is a prefix of the other**: Uses `congrArg (List.drop n ·)` where `n` is the + shorter string's length, then `List.cons_ne_nil` to prove the non-empty tail differs from `[]`. + This avoids `decide` entirely since `cons_ne_nil` is a definitional proof. +-/ +def mkStringLitNeProof (s₁ s₂ : String) : MetaM Expr := do + let l₁ := s₁.toList + let l₂ := s₂.toList + let l₁Expr := toExpr l₁ + let l₂Expr := toExpr l₂ + let charConst := mkConst ``Char + let listCharTy := mkApp (mkConst ``List [.zero]) charConst + -- Find the first differing character index + let minLen := min l₁.length l₂.length + let diffIdx := Id.run do + for i in [:minLen] do + if l₁[i]! ≠ l₂[i]! then return i + return minLen + -- Build the list inequality proof, dispatching on whether it's a character + -- difference or a prefix/length difference + let listEq := mkApp3 (mkConst ``Eq [.succ .zero]) listCharTy l₁Expr l₂Expr + let listNeProof ← + if diffIdx < minLen then + -- Both lists have a character at diffIdx: project to Nat via get!Internal + toNat, + -- then use Nat.ne_of_beq_eq_false rfl (avoids Decidable instances entirely) + let inhabCharExpr := mkConst ``Char.instInhabited + let natConst := mkConst ``Nat + let iExpr := toExpr diffIdx + -- f = fun l => (List.get!Internal l i).toNat + let projFn := mkLambda `l .default listCharTy + (mkApp (mkConst ``Char.toNat) + (mkApp4 (mkConst ``List.get!Internal [.zero]) charConst inhabCharExpr (mkBVar 0) iExpr)) + let mkGetToNat (lExpr : Expr) : Expr := + mkApp (mkConst ``Char.toNat) + (mkApp4 (mkConst ``List.get!Internal [.zero]) charConst inhabCharExpr lExpr iExpr) + let projL1 := mkGetToNat l₁Expr + let projL2 := mkGetToNat l₂Expr + let projEq := mkApp3 (mkConst ``Eq [.succ .zero]) natConst projL1 projL2 + let congrArgFn := mkApp5 (mkConst ``congrArg [.succ .zero, .succ .zero]) + listCharTy natConst l₁Expr l₂Expr projFn + -- Nat.ne_of_beq_eq_false rfl : n₁ ≠ n₂ (kernel evaluates Nat.beq on two literals) + let projNeProof := mkApp3 (mkConst ``Nat.ne_of_beq_eq_false) + projL1 projL2 (← mkEqRefl (mkConst ``false)) + pure <| mkApp4 (mkConst ``mt) listEq projEq congrArgFn projNeProof + else + -- One list is a prefix of the other: use drop + cons_ne_nil + let nExpr := toExpr minLen + let dropFn := mkLambda `l .default listCharTy + (mkApp3 (mkConst ``List.drop [.zero]) charConst nExpr (mkBVar 0)) + let dropL1 := mkApp3 (mkConst ``List.drop [.zero]) charConst nExpr l₁Expr + let dropL2 := mkApp3 (mkConst ``List.drop [.zero]) charConst nExpr l₂Expr + let dropEq := mkApp3 (mkConst ``Eq [.succ .zero]) listCharTy dropL1 dropL2 + let congrArgFn := mkApp5 (mkConst ``congrArg [.succ .zero, .succ .zero]) + listCharTy listCharTy l₁Expr l₂Expr dropFn + -- The longer list's drop has a head element; the shorter list's drop is [] + let (hdChar, tlList) := + if l₁.length ≤ l₂.length then + let dropped := l₂.drop minLen + (dropped.head!, dropped.tail!) + else + let dropped := l₁.drop minLen + (dropped.head!, dropped.tail!) + let hdExpr := toExpr hdChar + let tlExpr := toExpr tlList + -- cons_ne_nil hd tl : hd :: tl ≠ [] + let consNeNil := mkApp3 (mkConst ``List.cons_ne_nil [.zero]) charConst hdExpr tlExpr + let dropNeProof := + if l₁.length ≤ l₂.length then + -- l₁ is shorter: drop l₁ = [], drop l₂ = hd :: tl, need [] ≠ hd :: tl + mkApp4 (mkConst ``Ne.symm [.succ .zero]) listCharTy + (mkApp3 (mkConst ``List.cons [.zero]) charConst hdExpr tlExpr) + (mkApp (mkConst ``List.nil [.zero]) charConst) + consNeNil + else + -- l₂ is shorter: drop l₁ = hd :: tl, drop l₂ = [], need hd :: tl ≠ [] + consNeNil + pure <| mkApp4 (mkConst ``mt) listEq dropEq congrArgFn dropNeProof + -- strNeProof : String.ofList l₁ ≠ String.ofList l₂ via mt ofList_injective listNeProof + let strType := mkConst ``String + let strEq := mkApp3 (mkConst ``Eq [.succ .zero]) strType + (mkApp (mkConst ``String.ofList) l₁Expr) (mkApp (mkConst ``String.ofList) l₂Expr) + return mkApp4 (mkConst ``mt) strEq listEq + (mkApp2 (mkConst ``String.ofList_injective) l₁Expr l₂Expr) listNeProof + +end Lean.Meta diff --git a/src/Lean/Meta/Sym/Simp/EvalGround.lean b/src/Lean/Meta/Sym/Simp/EvalGround.lean index 7971ffe07e..a63573f0bf 100644 --- a/src/Lean/Meta/Sym/Simp/EvalGround.lean +++ b/src/Lean/Meta/Sym/Simp/EvalGround.lean @@ -8,6 +8,7 @@ prelude public import Lean.Meta.Sym.Simp.SimpM import Init.Sym.Lemmas import Lean.Meta.Sym.LitValues +import Lean.Meta.StringLitProof namespace Lean.Meta.Sym.Simp /-! @@ -376,6 +377,25 @@ def evalFinPred (n : Expr) (trueThm falseThm : Expr) (op : {n : Nat} → Fin n open Lean.Sym +/-- +Evaluates `@Eq String a b` for string literal arguments, producing kernel-efficient proofs. +When equal, uses `eq_self` (no kernel evaluation needed). When different, uses +`mkStringLitNeProof` which finds the first differing character position and proves +inequality via `congrArg (List.get?Internal · i)`. +-/ +private def evalStringEq (a b : Expr) : SimpM Result := do + let some va := getStringValue? a | return .rfl + let some vb := getStringValue? b | return .rfl + if va = vb then + let e ← getTrueExpr + return .step e (mkApp2 (mkConst ``eq_self [.succ .zero]) (mkConst ``String) a) (done := true) + else + let neProof ← mkStringLitNeProof va vb + let proof := mkApp2 (mkConst ``eq_false) + (mkApp3 (mkConst ``Eq [.succ .zero]) (mkConst ``String) a b) neProof + let e ← getFalseExpr + return .step e proof (done := true) + def evalLT (α : Expr) (a b : Expr) : SimpM Result := match_expr α with | Nat => evalBinPred getNatValue? (mkConst ``Nat.lt_eq_true) (mkConst ``Nat.lt_eq_false) (. < .) a b @@ -434,7 +454,7 @@ def evalEq (α : Expr) (a b : Expr) : SimpM Result := | Fin n => evalFinPred n (mkConst ``Fin.eq_eq_true) (mkConst ``Fin.eq_eq_false) (. = .) a b | BitVec n => evalBitVecPred n (mkConst ``BitVec.eq_eq_true) (mkConst ``BitVec.eq_eq_false) (. = .) a b | Char => evalBinPred getCharValue? (mkConst ``Char.eq_eq_true) (mkConst ``Char.eq_eq_false) (. = .) a b - | String => evalBinPred getStringValue? (mkConst ``String.eq_eq_true) (mkConst ``String.eq_eq_false) (. = .) a b + | String => evalStringEq a b | _ => return .rfl def evalDvd (α : Expr) (a b : Expr) : SimpM Result := diff --git a/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/String.lean b/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/String.lean index 36bd3a8716..c22ab020bd 100644 --- a/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/String.lean +++ b/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/String.lean @@ -7,6 +7,7 @@ module prelude public import Lean.Meta.Tactic.Simp.BuiltinSimprocs.Char +import Lean.Meta.StringLitProof public section @@ -57,6 +58,7 @@ builtin_dsimproc [simp, seval] reduceSingleton (String.singleton _) := fun e => let some m ← fromExpr? e.appArg! | return .continue evalPropStep e (op n m) + @[inline] def reduceBoolPred (declName : Name) (arity : Nat) (op : String → String → Bool) (e : Expr) : SimpM DStep := do unless e.isAppOfArity declName arity do return .continue let some n ← fromExpr? e.appFn!.appArg! | return .continue @@ -67,8 +69,18 @@ builtin_simproc [simp, seval] reduceLT (( _ : String) < _) := reduceBinPred `` builtin_simproc [simp, seval] reduceLE (( _ : String) ≤ _) := reduceBinPred ``LE.le 4 (. ≤ .) builtin_simproc [simp, seval] reduceGT (( _ : String) > _) := reduceBinPred ``GT.gt 4 (. > .) builtin_simproc [simp, seval] reduceGE (( _ : String) ≥ _) := reduceBinPred ``GE.ge 4 (. ≥ .) -builtin_simproc [simp, seval] reduceEq (( _ : String) = _) := reduceBinPred ``Eq 3 (. = .) -builtin_simproc [simp, seval] reduceNe (( _ : String) ≠ _) := reduceBinPred ``Ne 3 (. ≠ .) + +builtin_simproc [simp, seval] reduceEq (( _ : String) = _) := fun e => do + unless e.isAppOfArity ``Eq 3 do return .continue + let some n ← fromExpr? e.appFn!.appArg! | return .continue + let some m ← fromExpr? e.appArg! | return .continue + evalEqPropStep e (n = m) (mkStringLitNeProof n m) + +builtin_simproc [simp, seval] reduceNe (( _ : String) ≠ _) := fun e => do + unless e.isAppOfArity ``Ne 3 do return .continue + let some n ← fromExpr? e.appFn!.appArg! | return .continue + let some m ← fromExpr? e.appArg! | return .continue + evalNePropStep e (n ≠ m) (mkStringLitNeProof n m) builtin_dsimproc [simp, seval] reduceBEq (( _ : String) == _) := reduceBoolPred ``BEq.beq 4 (. == .) builtin_dsimproc [simp, seval] reduceBNe (( _ : String) != _) := reduceBoolPred ``bne 4 (. != .) diff --git a/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/Util.lean b/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/Util.lean index 6aa44faa21..520ff19762 100644 --- a/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/Util.lean +++ b/src/Lean/Meta/Tactic/Simp/BuiltinSimprocs/Util.lean @@ -24,4 +24,40 @@ def evalPropStep (p : Expr) (result : Bool) : SimpM Step := do else return .done { expr := mkConst ``False, proof? := mkAppN (mkConst ``eq_false_of_decide) #[p, d.appArg!, (← mkEqRefl (mkConst ``false))] } +/-- +Like `evalPropStep`, but specialized for `@Eq.{u} α a b`. When the values are equal, uses +`eq_true rfl` which requires no kernel evaluation. When different, calls `mkNeProof` to build +a proof of `a ≠ b`. +-/ +def evalEqPropStep (e : Expr) (eq : Bool) (mkNeProof : SimpM Expr) : SimpM Step := do + let α := e.appFn!.appFn!.appArg! + let a := e.appFn!.appArg! + let u := e.appFn!.appFn!.appFn!.constLevels!.head! + if eq then + let proof := mkApp2 (mkConst ``eq_true) e (mkApp2 (mkConst ``Eq.refl [u]) α a) + return .done { expr := mkConst ``True, proof? := proof } + else + let neProof ← mkNeProof + let proof := mkApp2 (mkConst ``eq_false) e neProof + return .done { expr := mkConst ``False, proof? := proof } + +/-- +Like `evalPropStep`, but specialized for `@Ne.{u} α a b`. When the values differ, calls +`mkNeProof` to build a proof of `a ≠ b`. When equal, uses `eq_false (not_not_intro rfl)` +which requires no kernel evaluation. +-/ +def evalNePropStep (e : Expr) (ne : Bool) (mkNeProof : SimpM Expr) : SimpM Step := do + if ne then + let neProof ← mkNeProof + let proof := mkApp2 (mkConst ``eq_true) e neProof + return .done { expr := mkConst ``True, proof? := proof } + else + let α := e.appFn!.appFn!.appArg! + let a := e.appFn!.appArg! + let u := e.appFn!.appFn!.appFn!.constLevels!.head! + let eqProp := mkApp3 (mkConst ``Eq [u]) α a a + let rflExpr := mkApp2 (mkConst ``Eq.refl [u]) α a + let proof := mkApp2 (mkConst ``eq_false) e (mkApp2 (mkConst ``not_not_intro) eqProp rflExpr) + return .done { expr := mkConst ``False, proof? := proof } + end Lean.Meta.Simp diff --git a/tests/elab/string_neq_kernel_cost.lean b/tests/elab/string_neq_kernel_cost.lean new file mode 100644 index 0000000000..39d394ac66 --- /dev/null +++ b/tests/elab/string_neq_kernel_cost.lean @@ -0,0 +1,65 @@ +/-! +Tests that string equality/inequality simprocs produce kernel-efficient proofs. + +Previously, `String.reduceNe` and `String.reduceEq` used `decide` with `String.decEq`, which +forced the kernel to run UTF-8 encoding/decoding and byte array comparison. This caused a high +number of kernel unfoldings (e.g. 86 for `List.rec` on short strings). + +The fix uses `String.ofList_injective` to reduce to `List Char` comparison, which is much +cheaper in the kernel since string literals are `String.ofList [chars]` at the kernel level. +-/ + +-- String.reduceNe: different strings (the primary use case) +-- Kernel should NOT unfold ByteArray/utf8 functions +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "hello" ≠ "foo" := by simp + +-- String.reduceNe: equal strings +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "hello" ≠ "hello" ↔ False := by simp + +-- String.reduceEq: equal strings +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "hello" = "hello" := by simp + +-- String.reduceEq: different strings +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "hello" = "foo" ↔ False := by simp + +-- Corner case: empty string vs nonempty +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "" ≠ "a" := by simp + +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "a" ≠ "" := by simp + +-- Corner case: one string is a prefix of the other +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "foo" ≠ "foobar" := by simp + +set_option diagnostics true in +set_option diagnostics.threshold 16 in +/-- -/ +#guard_msgs in +example : "foobar" ≠ "foo" := by simp