chore: add entry point

doc: SimpM
feat: add inferType with cache
2026-04-03 18:54:08 +00:00 · 2025-12-31 20:19:55 -08:00 · 2025-12-31 20:19:55 -08:00 · 2025-12-31 20:19:55 -08:00 · 2025-12-31 20:19:55 -08:00 · 2025-12-31 20:19:55 -08:00
7 changed files with 240 additions and 1 deletions
--- a/src/Init/Prelude.lean
+++ b/src/Init/Prelude.lean
@@ -375,6 +375,10 @@ theorem congr {α : Sort u} {β : Sort v} {f₁ f₂ : α → β} {a₁ a₂ :
 theorem congrFun {α : Sort u} {β : α → Sort v} {f g : (x : α) → β x} (h : Eq f g) (a : α) : Eq (f a) (g a) :=
  h ▸ rfl

+/-- Similar to `congrFun` but `β` does not depend on `α`. -/
+theorem congrFun' {α : Sort u} {β : Sort v} {f g : α → β} (h : Eq f g) (a : α) : Eq (f a) (g a) :=
+  h ▸ rfl
+
 /-!
 Initialize the Quotient Module, which effectively adds the following definitions:
 ```
--- a/src/Lean/Meta/Sym.lean
+++ b/src/Lean/Meta/Sym.lean
@@ -19,6 +19,9 @@ public import Lean.Meta.Sym.ProofInstInfo
 public import Lean.Meta.Sym.AbstractS
 public import Lean.Meta.Sym.Pattern
 public import Lean.Meta.Sym.Apply
+public import Lean.Meta.Sym.InferType
+public import Lean.Meta.Sym.SimpM
+public import Lean.Meta.Sym.Simp

 /-!
 # Symbolic simulation support.
--- a/src/Lean/Meta/Sym/InferType.lean
+++ b/src/Lean/Meta/Sym/InferType.lean
@@ -0,0 +1,20 @@
+/-
+Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Leonardo de Moura
+-/
+module
+prelude
+public import Lean.Meta.Sym.SymM
+namespace Lean.Meta.Sym
+
+/-- Returns the type of `e`. -/
+def inferType (e : Expr) : SymM Expr := do
+  if let some type := (← get).inferType.find? { expr := e } then
+    return type
+  else
+    let type ← Meta.inferType e
+    modify fun s => { s with inferType := s.inferType.insert { expr := e } type }
+    return type
+
+end Lean.Meta.Sym
--- a/src/Lean/Meta/Sym/ProofInstInfo.lean
+++ b/src/Lean/Meta/Sym/ProofInstInfo.lean
@@ -29,7 +29,7 @@ public def mkProofInstInfo? (declName : Name) : MetaM (Option ProofInstInfo) :=
    let mut argsInfo := #[]
    let mut found := false
    for x in xs do
-      let type ← inferType x
+      let type ← Meta.inferType x
      let isInstance := isClass? env type |>.isSome
      let isProof ← isProp type
      if isInstance || isProof then
--- a/src/Lean/Meta/Sym/Simp.lean
+++ b/src/Lean/Meta/Sym/Simp.lean
@@ -0,0 +1,15 @@
+/-
+Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Leonardo de Moura
+-/
+module
+prelude
+public import Lean.Meta.Sym.SimpM
+namespace Lean.Meta.Sym.Simp
+
+@[export lean_sym_simp]
+def simpImpl (e : Expr) : SimpM Result := do
+  throwError "NIY {e}"
+
+end Lean.Meta.Sym.Simp
--- a/src/Lean/Meta/Sym/SimpM.lean
+++ b/src/Lean/Meta/Sym/SimpM.lean
@@ -0,0 +1,168 @@
+/-
+Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Leonardo de Moura
+-/
+module
+prelude
+public import Lean.Meta.Sym.SymM
+public import Lean.Meta.Sym.Pattern
+public section
+namespace Lean.Meta.Sym.Simp
+
+/-!
+# Structural Simplifier for Symbolic Simulation
+
+It is a specialized simplifier designed for symbolic simulation workloads.
+It addresses performance bottlenecks identified in the standard `simp` tactic
+when applied to large terms typical in symbolic execution.
+
+## Design Goals
+
+1. **Efficient caching** via pointer-based keys on maximally shared terms
+2. **Fast pattern matching** using the `Pattern` infrastructure instead of `isDefEq`
+3. **Minimal proof term overhead** by using `shareCommon` and efficient congruence lemma application
+
+## Key Performance Problems Addressed
+
+### 1. Cache Inefficiency (Hash Collisions)
+
+The standard simplifier uses structural equality for cache keys. With large terms,
+hash collisions cause O(n) comparisons that fail, leading to O(n²) behavior.
+
+**Solution:** Require maximally shared input terms (`shareCommon`) and use
+pointer-based cache keys. Structurally equal terms have equal pointers, making
+cache lookup O(1).
+
+### 2. `isDefEq` in Rewrite Matching
+
+Profiling shows that `isDefEq` dominates simplification time in many workloads.
+For each candidate rewrite rule, definitional equality checking with metavariable unification
+is performed, and sometimes substantial time is spent checking whether the scopes are
+compatible.
+
+**Solution:** Use the `Pattern` infrastructure for syntactic matching:
+- No metavariable creation or assignment
+- No occurs check (`CheckAssignment`)
+- Direct de Bruijn index comparison
+
+### 3. `inferType` in Proof Construction
+
+In the standard simplifier, building `Eq.trans` and `congrArg` proofs uses `inferType` on proof terms,
+which reconstructs large expressions and destroys sharing. It often causes O(n²) allocation.
+
+**Solution:**
+- Never perform `inferType` on proof terms when constructing congruence and transitivity proofs.
+- Use a pointer-based cache for `inferType` on terms.
+
+### 4. `funext` Proof Explosion
+
+Nested `funext` applications create O(n²) proof terms due to implicit arguments
+`{f} {g}` of size O(n) repeated n times.
+
+**Solution:** Generate `funext_k` for k binders at once, stating the implicit
+arguments only once.
+
+### 5. Binder Re-entry Cache Invalidation
+
+When a simplification theorem restructures a lambda, re-entering creates a fresh
+free variable, invalidating all cached results for subterms.
+
+**Solution:** Reuse free variables across binder re-entry when safe:
+- Stack-based approach: reuse when types match on re-entry path
+- Instance-aware: track local type class instances separately from hypotheses
+- If simplifier doesn't discharge hypotheses from local context, only instances matter
+
+### 6. Contextual `ite` Handling
+
+The standard `ite_congr` theorem adds hypotheses when entering branches,
+invalidating the cache and causing O(2^n) behavior on conditional trees.
+
+**Solution:** Non-contextual `ite` handling for symbolic simulation:
+- Only simplify the condition
+- If condition reduces to `True`/`False`, take the appropriate branch
+- Otherwise, keep `ite` symbolic (let the simulator handle case-splitting)
+
+## Architecture
+
+### Input Requirements
+
+- Terms must be maximally shared via `shareCommon` like every other module in `Sym`.
+- Enables pointer-based cache keys throughout
+
+### Integration with SymM
+
+`SimpM` is designed to work within the `SymM` symbolic simulation framework:
+- Uses `BackwardRule` for control-flow (monadic bind, match, combinators)
+- `SimpM` handles term simplification between control-flow steps
+- Avoids entering control-flow binders
+-/
+
+
+/-- Configuration options for the structural simplifier. -/
+structure Config where
+  /-- If `true`, unfold let-bindings (zeta reduction) during simplification. -/
+  zetaDelta : Bool := true
+  -- **TODO**: many are still missing
+
+/-- The result of simplifying some expression `e`. -/
+structure Result where
+  /-- The simplified version of `e` -/
+  expr           : Expr
+  /-- A proof that `e = expr`, where the simplified expression is on the RHS.
+  If `none`, the proof is assumed to be `refl`. -/
+  proof?         : Option Expr := none
+
+/--
+A simplification theorem for the structural simplifier.
+
+Contains both the theorem expression and a precomputed pattern for efficient unification
+during rewriting.
+-/
+structure Theorem where
+  /-- The theorem expression, typically `Expr.const declName` for a named theorem. -/
+  expr      : Expr
+  /-- Precomputed pattern extracted from the theorem's type for efficient matching. -/
+  pattern   : Pattern
+
+/-- Collection of simplification theorems available to the simplifier. -/
+structure Theorems where
+  /-- **TODO**: No indexing for now. We will add a structural discrimination tree later. -/
+  thms : Array Theorem := #[]
+
+/-- Read-only context for the simplifier. -/
+structure Context where
+  /-- Available simplification theorems. -/
+  thms   : Theorems := {}
+  /-- Simplifier configuration options. -/
+  config : Config := {}
+  /-- Size of the local context when simplification started.
+  Used to determine which free variables were introduced during simplification. -/
+  initialLCtxSize : Nat
+
+/-- Cache mapping expressions (by pointer equality) to their simplified results. -/
+abbrev Cache := PHashMap ExprPtr Result
+
+/-- Mutable state for the simplifier. -/
+structure State where
+  /-- Cache of previously simplified expressions to avoid redundant work. -/
+  cache : Cache := {}
+  /-- Stack of free variables available for reuse when re-entering binders.
+  Each entry is (type pointer, fvarId). -/
+  binderStack : List (ExprPtr × FVarId) := []
+
+/-- Monad for the structural simplifier, layered on top of `SymM`. -/
+abbrev SimpM := ReaderT Context StateRefT State SymM
+
+instance : Inhabited (SimpM α) where
+  default := throwError "<default>"
+
+/-- Runs a `SimpM` computation with the given theorems and configuration. -/
+abbrev SimpM.run (x : SimpM α) (thms : Theorems := {}) (config : Config := {}) : SymM α := do
+  let initialLCtxSize := (← getLCtx).decls.size
+  x { initialLCtxSize, thms, config } |>.run' {}
+
+@[extern "lean_sym_simp"] -- Forward declaration
+opaque simp (e : Expr) : SimpM Result
+
+end Lean.Meta.Sym.Simp
--- a/src/Lean/Meta/Sym/SymM.lean
+++ b/src/Lean/Meta/Sym/SymM.lean
@@ -35,6 +35,28 @@ public structure ProofInstInfo where
  argsInfo : Array ProofInstArgInfo
  deriving Inhabited

+/--
+Information on how to build congruence proofs for function applications.
+This enables efficient rewriting of subterms without repeatedly inferring types or instances.
+-/
+inductive CongrInfo where
+  | /--
+    For functions with a fixed prefix of implicit/instance arguments followed by
+    explicit non-dependent arguments that can be rewritten independently.
+
+    - `prefixSize`: Number of leading arguments (types, instances) that are determined
+      by the suffix arguments and should not be rewritten directly.
+    - `suffixSize`: Number of trailing arguments that can be rewritten using simple congruence.
+
+    Examples (showing `prefixSize`, `suffixSize`):
+    - `HAdd.hAdd {α β γ} [HAdd α β γ] (a : α) (b : β)`: `(4, 2)` — rewrite `a` and `b`
+    - `And (p q : Prop)`: `(0, 2)` — rewrite both propositions
+    - `Eq {α} (a b : α)`: `(1, 2)` — rewrite `a` and `b`, type `α` is fixed
+    - `Neg.neg {α} [Neg α] (a : α)`: `(2, 1)` — rewrite just `a`
+    -/
+    simple (prefixSize : Nat) (suffixSize : Nat)
+  -- **TODO**: Add other kinds
+
 structure State where
  /--
  Maps expressions to their maximal free variable (by declaration index).
@@ -56,6 +78,13 @@ structure State where
  -/
  maxFVar : PHashMap ExprPtr (Option FVarId) := {}
  proofInstInfo : PHashMap Name (Option ProofInstInfo) := {}
+  /--
+  Cache for `inferType` results, keyed by pointer equality.
+  `SymM` uses a fixed configuration, so we can use a simpler key than `MetaM`.
+  Remark: type inference is a bottleneck on `Meta.Tactic.Simp` simplifier.
+  -/
+  inferType : PHashMap ExprPtr Expr := {}
+  congrInfo : PHashMap ExprPtr CongrInfo := {}

 abbrev SymM := ReaderT Grind.Params StateRefT State Grind.GrindM
Author	SHA1	Message	Date
Leonardo de Moura	669799962b	chore: add entry point	2025-12-31 20:19:55 -08:00
Leonardo de Moura	95dd0b4ba3	doc: `SimpM`	2025-12-31 20:19:55 -08:00
Leonardo de Moura	1d66aa7a25	feat: add `inferType` with cache	2025-12-31 20:19:55 -08:00
Leonardo de Moura	1f0d4d5545	feat: add `SimpM`	2025-12-31 20:19:55 -08:00
Leonardo de Moura	2defd6a913	feat: add `congrFun'`	2025-12-31 20:19:55 -08:00