Compare commits

...

5 Commits

Author SHA1 Message Date
Leonardo de Moura
669799962b chore: add entry point 2025-12-31 20:19:55 -08:00
Leonardo de Moura
95dd0b4ba3 doc: SimpM 2025-12-31 20:19:55 -08:00
Leonardo de Moura
1d66aa7a25 feat: add inferType with cache 2025-12-31 20:19:55 -08:00
Leonardo de Moura
1f0d4d5545 feat: add SimpM 2025-12-31 20:19:55 -08:00
Leonardo de Moura
2defd6a913 feat: add congrFun' 2025-12-31 20:19:55 -08:00
7 changed files with 240 additions and 1 deletions

View File

@@ -375,6 +375,10 @@ theorem congr {α : Sort u} {β : Sort v} {f₁ f₂ : α → β} {a₁ a₂ :
theorem congrFun {α : Sort u} {β : α Sort v} {f g : (x : α) β x} (h : Eq f g) (a : α) : Eq (f a) (g a) :=
h rfl
/-- Similar to `congrFun` but `β` does not depend on `α`. -/
theorem congrFun' {α : Sort u} {β : Sort v} {f g : α β} (h : Eq f g) (a : α) : Eq (f a) (g a) :=
h rfl
/-!
Initialize the Quotient Module, which effectively adds the following definitions:
```

View File

@@ -19,6 +19,9 @@ public import Lean.Meta.Sym.ProofInstInfo
public import Lean.Meta.Sym.AbstractS
public import Lean.Meta.Sym.Pattern
public import Lean.Meta.Sym.Apply
public import Lean.Meta.Sym.InferType
public import Lean.Meta.Sym.SimpM
public import Lean.Meta.Sym.Simp
/-!
# Symbolic simulation support.

View File

@@ -0,0 +1,20 @@
/-
Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
module
prelude
public import Lean.Meta.Sym.SymM
namespace Lean.Meta.Sym
/-- Returns the type of `e`. -/
def inferType (e : Expr) : SymM Expr := do
if let some type := ( get).inferType.find? { expr := e } then
return type
else
let type Meta.inferType e
modify fun s => { s with inferType := s.inferType.insert { expr := e } type }
return type
end Lean.Meta.Sym

View File

@@ -29,7 +29,7 @@ public def mkProofInstInfo? (declName : Name) : MetaM (Option ProofInstInfo) :=
let mut argsInfo := #[]
let mut found := false
for x in xs do
let type inferType x
let type Meta.inferType x
let isInstance := isClass? env type |>.isSome
let isProof isProp type
if isInstance || isProof then

View File

@@ -0,0 +1,15 @@
/-
Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
module
prelude
public import Lean.Meta.Sym.SimpM
namespace Lean.Meta.Sym.Simp
@[export lean_sym_simp]
def simpImpl (e : Expr) : SimpM Result := do
throwError "NIY {e}"
end Lean.Meta.Sym.Simp

View File

@@ -0,0 +1,168 @@
/-
Copyright (c) 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
module
prelude
public import Lean.Meta.Sym.SymM
public import Lean.Meta.Sym.Pattern
public section
namespace Lean.Meta.Sym.Simp
/-!
# Structural Simplifier for Symbolic Simulation
It is a specialized simplifier designed for symbolic simulation workloads.
It addresses performance bottlenecks identified in the standard `simp` tactic
when applied to large terms typical in symbolic execution.
## Design Goals
1. **Efficient caching** via pointer-based keys on maximally shared terms
2. **Fast pattern matching** using the `Pattern` infrastructure instead of `isDefEq`
3. **Minimal proof term overhead** by using `shareCommon` and efficient congruence lemma application
## Key Performance Problems Addressed
### 1. Cache Inefficiency (Hash Collisions)
The standard simplifier uses structural equality for cache keys. With large terms,
hash collisions cause O(n) comparisons that fail, leading to O(n²) behavior.
**Solution:** Require maximally shared input terms (`shareCommon`) and use
pointer-based cache keys. Structurally equal terms have equal pointers, making
cache lookup O(1).
### 2. `isDefEq` in Rewrite Matching
Profiling shows that `isDefEq` dominates simplification time in many workloads.
For each candidate rewrite rule, definitional equality checking with metavariable unification
is performed, and sometimes substantial time is spent checking whether the scopes are
compatible.
**Solution:** Use the `Pattern` infrastructure for syntactic matching:
- No metavariable creation or assignment
- No occurs check (`CheckAssignment`)
- Direct de Bruijn index comparison
### 3. `inferType` in Proof Construction
In the standard simplifier, building `Eq.trans` and `congrArg` proofs uses `inferType` on proof terms,
which reconstructs large expressions and destroys sharing. It often causes O(n²) allocation.
**Solution:**
- Never perform `inferType` on proof terms when constructing congruence and transitivity proofs.
- Use a pointer-based cache for `inferType` on terms.
### 4. `funext` Proof Explosion
Nested `funext` applications create O(n²) proof terms due to implicit arguments
`{f} {g}` of size O(n) repeated n times.
**Solution:** Generate `funext_k` for k binders at once, stating the implicit
arguments only once.
### 5. Binder Re-entry Cache Invalidation
When a simplification theorem restructures a lambda, re-entering creates a fresh
free variable, invalidating all cached results for subterms.
**Solution:** Reuse free variables across binder re-entry when safe:
- Stack-based approach: reuse when types match on re-entry path
- Instance-aware: track local type class instances separately from hypotheses
- If simplifier doesn't discharge hypotheses from local context, only instances matter
### 6. Contextual `ite` Handling
The standard `ite_congr` theorem adds hypotheses when entering branches,
invalidating the cache and causing O(2^n) behavior on conditional trees.
**Solution:** Non-contextual `ite` handling for symbolic simulation:
- Only simplify the condition
- If condition reduces to `True`/`False`, take the appropriate branch
- Otherwise, keep `ite` symbolic (let the simulator handle case-splitting)
## Architecture
### Input Requirements
- Terms must be maximally shared via `shareCommon` like every other module in `Sym`.
- Enables pointer-based cache keys throughout
### Integration with SymM
`SimpM` is designed to work within the `SymM` symbolic simulation framework:
- Uses `BackwardRule` for control-flow (monadic bind, match, combinators)
- `SimpM` handles term simplification between control-flow steps
- Avoids entering control-flow binders
-/
/-- Configuration options for the structural simplifier. -/
structure Config where
/-- If `true`, unfold let-bindings (zeta reduction) during simplification. -/
zetaDelta : Bool := true
-- **TODO**: many are still missing
/-- The result of simplifying some expression `e`. -/
structure Result where
/-- The simplified version of `e` -/
expr : Expr
/-- A proof that `e = expr`, where the simplified expression is on the RHS.
If `none`, the proof is assumed to be `refl`. -/
proof? : Option Expr := none
/--
A simplification theorem for the structural simplifier.
Contains both the theorem expression and a precomputed pattern for efficient unification
during rewriting.
-/
structure Theorem where
/-- The theorem expression, typically `Expr.const declName` for a named theorem. -/
expr : Expr
/-- Precomputed pattern extracted from the theorem's type for efficient matching. -/
pattern : Pattern
/-- Collection of simplification theorems available to the simplifier. -/
structure Theorems where
/-- **TODO**: No indexing for now. We will add a structural discrimination tree later. -/
thms : Array Theorem := #[]
/-- Read-only context for the simplifier. -/
structure Context where
/-- Available simplification theorems. -/
thms : Theorems := {}
/-- Simplifier configuration options. -/
config : Config := {}
/-- Size of the local context when simplification started.
Used to determine which free variables were introduced during simplification. -/
initialLCtxSize : Nat
/-- Cache mapping expressions (by pointer equality) to their simplified results. -/
abbrev Cache := PHashMap ExprPtr Result
/-- Mutable state for the simplifier. -/
structure State where
/-- Cache of previously simplified expressions to avoid redundant work. -/
cache : Cache := {}
/-- Stack of free variables available for reuse when re-entering binders.
Each entry is (type pointer, fvarId). -/
binderStack : List (ExprPtr × FVarId) := []
/-- Monad for the structural simplifier, layered on top of `SymM`. -/
abbrev SimpM := ReaderT Context StateRefT State SymM
instance : Inhabited (SimpM α) where
default := throwError "<default>"
/-- Runs a `SimpM` computation with the given theorems and configuration. -/
abbrev SimpM.run (x : SimpM α) (thms : Theorems := {}) (config : Config := {}) : SymM α := do
let initialLCtxSize := ( getLCtx).decls.size
x { initialLCtxSize, thms, config } |>.run' {}
@[extern "lean_sym_simp"] -- Forward declaration
opaque simp (e : Expr) : SimpM Result
end Lean.Meta.Sym.Simp

View File

@@ -35,6 +35,28 @@ public structure ProofInstInfo where
argsInfo : Array ProofInstArgInfo
deriving Inhabited
/--
Information on how to build congruence proofs for function applications.
This enables efficient rewriting of subterms without repeatedly inferring types or instances.
-/
inductive CongrInfo where
| /--
For functions with a fixed prefix of implicit/instance arguments followed by
explicit non-dependent arguments that can be rewritten independently.
- `prefixSize`: Number of leading arguments (types, instances) that are determined
by the suffix arguments and should not be rewritten directly.
- `suffixSize`: Number of trailing arguments that can be rewritten using simple congruence.
Examples (showing `prefixSize`, `suffixSize`):
- `HAdd.hAdd {α β γ} [HAdd α β γ] (a : α) (b : β)`: `(4, 2)` — rewrite `a` and `b`
- `And (p q : Prop)`: `(0, 2)` — rewrite both propositions
- `Eq {α} (a b : α)`: `(1, 2)` — rewrite `a` and `b`, type `α` is fixed
- `Neg.neg {α} [Neg α] (a : α)`: `(2, 1)` — rewrite just `a`
-/
simple (prefixSize : Nat) (suffixSize : Nat)
-- **TODO**: Add other kinds
structure State where
/--
Maps expressions to their maximal free variable (by declaration index).
@@ -56,6 +78,13 @@ structure State where
-/
maxFVar : PHashMap ExprPtr (Option FVarId) := {}
proofInstInfo : PHashMap Name (Option ProofInstInfo) := {}
/--
Cache for `inferType` results, keyed by pointer equality.
`SymM` uses a fixed configuration, so we can use a simpler key than `MetaM`.
Remark: type inference is a bottleneck on `Meta.Tactic.Simp` simplifier.
-/
inferType : PHashMap ExprPtr Expr := {}
congrInfo : PHashMap ExprPtr CongrInfo := {}
abbrev SymM := ReaderT Grind.Params StateRefT State Grind.GrindM