Compare commits

...

7 Commits

Author SHA1 Message Date
Kim Morrison
ac0f4b9102 comment out slow test 2025-09-23 07:53:40 +02:00
Kim Morrison
6a54e66587 fix merge 2025-09-23 07:18:27 +02:00
Thomas Zhu
9d8f76776c Suggested changes 2025-09-23 07:18:25 +02:00
Thomas Zhu
7bb922be6e style 2025-09-23 07:18:23 +02:00
Thomas Zhu
5d566ca313 use environment extension for blacklist 2025-09-23 07:18:18 +02:00
Thomas Zhu
79f81b9ee3 Tune MePo 2025-09-23 07:18:15 +02:00
Kim Morrison
051c8e874c naive implementation of mepo
fix

imports
2025-09-23 07:18:14 +02:00
5 changed files with 302 additions and 133 deletions

View File

@@ -8,5 +8,3 @@ module
prelude
public import Lean.Data.NameMap.Basic
public import Lean.Data.NameMap.AdditionalOperations
public section

View File

@@ -6,134 +6,5 @@ Authors: Kim Morrison
module
prelude
public import Lean.Elab.Command
public import Lean.Meta.Eval
public import Lean.Meta.CompletionName
public import Init.Data.Random
public section
/-!
# An API for premise selection algorithms.
This module provides a basic API for premise selection algorithms,
which are used to suggest identifiers that should be introduced in a proof.
The core interface is the `Selector` type, which is a function from a metavariable
and a configuration to a list of suggestions.
The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation
is `Lean.PremiseSelection.empty`.
Lean does not provide a default premise selector, so this module is intended to be used in conjunction
with a downstream package which registers a premise selector.
-/
namespace Lean.PremiseSelection
/--
A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant.
If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.)
the score may be adjusted for that application.
-/
structure Suggestion where
name : Name
/--
The score of the suggestion, as a probability that this suggestion should be used.
-/
score : Float
structure Config where
/--
The maximum number of suggestions to return.
-/
maxSuggestions : Option Nat := none
/--
The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`.
This may be used to adjust the score of the suggestions
-/
caller : Option Name := none
/--
A filter on suggestions; only suggestions returning `true` should be returned.
(It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.)
-/
filter : Name MetaM Bool := fun _ => pure true
/--
An optional arbitrary "hint" to the premise selection algorithm.
There is no guarantee that the algorithm will make any use of the hint.
Potential use cases include a natural language comment provided by the user
(e.g. allowing use of the premise selector as a search engine)
or including context from the current proof and/or file.
We may later split these use cases into separate fields if necessary.
-/
hint : Option String := none
abbrev Selector : Type := MVarId Config MetaM (Array Suggestion)
/--
The trivial premise selector, which returns no suggestions.
-/
def empty : Selector := fun _ _ => pure #[]
/-- A random premise selection algorithm, provided solely for testing purposes. -/
def random (gen : StdGen := 37, 59) : Selector := fun _ cfg => do
IO.stdGenRef.set gen
let env getEnv
let max := cfg.maxSuggestions.getD 10
let consts := env.const2ModIdx.keysArray
let mut suggestions := #[]
while suggestions.size < max do
let i IO.rand 0 consts.size
let name := consts[i]!
if ! (`Lean).isPrefixOf name && Lean.Meta.allowCompletion env name then
suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat }
return suggestions
initialize premiseSelectorExt : EnvExtension (Option Selector)
registerEnvExtension (pure none)
/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/
def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do
let some selector := premiseSelectorExt.getState ( getEnv) |
throwError "No premise selector registered. \
(Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)"
selector m c
/-!
Currently the registration mechanism is just global state.
This means that if multiple modules register premise selectors,
the behaviour will be dependent on the order of loading modules.
We should replace this with a mechanism so that
premise selectors are configured via options in the `lakefile`, and
commands are only used to override in a single declaration or file.
-/
/-- Set the current premise selector.-/
def registerPremiseSelector (selector : Selector) : CoreM Unit := do
modifyEnv fun env => premiseSelectorExt.setState env (some selector)
open Lean Elab Command in
@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd]
def elabSetPremiseSelector : CommandElab
| `(command| set_premise_selector $selector) => do
let selector liftTermElabM do
try
let selectorTerm Term.elabTermEnsuringType selector (some (Expr.const ``Selector []))
unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm
catch _ =>
throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`."
liftCoreM (registerPremiseSelector selector)
| _ => throwUnsupportedSyntax
open Lean.Elab.Tactic in
@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ =>
liftMetaTactic1 fun mvarId => do
let suggestions select mvarId
logInfo m!"Premise suggestions: {suggestions.map (·.name)}"
return mvarId
end Lean.PremiseSelection
import Lean.PremiseSelection.Basic
import Lean.PremiseSelection.MePo

View File

@@ -0,0 +1,180 @@
/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kim Morrison
-/
module
prelude
public import Lean.Elab.Command
public import Lean.Meta.Eval
public import Lean.Meta.CompletionName
public import Init.Data.Random
/-!
# An API for premise selection algorithms.
This module provides a basic API for premise selection algorithms,
which are used to suggest identifiers that should be introduced in a proof.
The core interface is the `Selector` type, which is a function from a metavariable
and a configuration to a list of suggestions.
The `Selector` is registered as an environment extension, and the trivial (no suggestions) implementation
is `Lean.PremiseSelection.empty`.
Lean does not provide a default premise selector, so this module is intended to be used in conjunction
with a downstream package which registers a premise selector.
-/
@[expose] public section
namespace Lean.PremiseSelection
/--
A `Suggestion` is essentially just an identifier and a confidence score that the identifier is relevant.
If the premise selection request included information about the intended use (e.g. in the simplifier, in `grind`, etc.)
the score may be adjusted for that application.
-/
structure Suggestion where
name : Name
/--
The score of the suggestion, as a probability that this suggestion should be used.
-/
score : Float
structure Config where
/--
The maximum number of suggestions to return.
-/
maxSuggestions : Option Nat := none
/--
The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`.
This may be used to adjust the score of the suggestions
-/
caller : Option Name := none
/--
A filter on suggestions; only suggestions returning `true` should be returned.
(It can be better to filter on the premise selection side, to ensure that enough suggestions are returned.)
-/
filter : Name MetaM Bool := fun _ => pure true
/--
An optional arbitrary "hint" to the premise selection algorithm.
There is no guarantee that the algorithm will make any use of the hint.
Potential use cases include a natural language comment provided by the user
(e.g. allowing use of the premise selector as a search engine)
or including context from the current proof and/or file.
We may later split these use cases into separate fields if necessary.
-/
hint : Option String := none
abbrev Selector : Type := MVarId Config MetaM (Array Suggestion)
section DenyList
/-- Premises from a module whose name has one of the following components are not retrieved. -/
builtin_initialize moduleDenyListExt : SimplePersistentEnvExtension String (List String)
registerSimplePersistentEnvExtension {
addEntryFn := (·.cons)
addImportedFn := mkStateFromImportedEntries (·.cons) []
}
/-- A premise whose name has one of the following components is not retrieved. -/
builtin_initialize nameDenyListExt : SimplePersistentEnvExtension String (List String)
registerSimplePersistentEnvExtension {
addEntryFn := (·.cons)
addImportedFn := mkStateFromImportedEntries (·.cons) []
}
/-- A premise whose `type.getForallBody.getAppFn` is a constant that has one of these prefixes is not retrieved. -/
builtin_initialize typePrefixDenyListExt : SimplePersistentEnvExtension Name (List Name)
registerSimplePersistentEnvExtension {
addEntryFn := (·.cons)
addImportedFn := mkStateFromImportedEntries (·.cons) []
}
def isDeniedModule (env : Environment) (moduleName : Name) : Bool :=
(moduleDenyListExt.getState env).any fun p => moduleName.anyS (· == p)
def isDeniedPremise (env : Environment) (name : Name) : Bool := Id.run do
if name == ``sorryAx then return true
if name.isInternalDetail then return true
if (nameDenyListExt.getState env).any (fun p => name.anyS (· == p)) then return true
if let some moduleIdx := env.getModuleIdxFor? name then
let moduleName := env.header.moduleNames[moduleIdx.toNat]!
if isDeniedModule env moduleName then
return true
let some ci := env.find? name | return true
if let .const fnName _ := ci.type.getForallBody.getAppFn then
if (typePrefixDenyListExt.getState env).any (fun p => p.isPrefixOf fnName) then return true
return false
end DenyList
/--
The trivial premise selector, which returns no suggestions.
-/
def empty : Selector := fun _ _ => pure #[]
/-- A random premise selection algorithm, provided solely for testing purposes. -/
def random (gen : StdGen := 37, 59) : Selector := fun _ cfg => do
IO.stdGenRef.set gen
let env getEnv
let max := cfg.maxSuggestions.getD 10
let consts := env.const2ModIdx.keysArray
let mut suggestions := #[]
while suggestions.size < max do
let i IO.rand 0 consts.size
let name := consts[i]!
unless isDeniedPremise env name do
suggestions := suggestions.push { name := name, score := 1.0 / consts.size.toFloat }
return suggestions
builtin_initialize premiseSelectorExt : EnvExtension (Option Selector)
registerEnvExtension (pure none)
/-- Generate premise suggestions for the given metavariable, using the currently registered premise selector. -/
def select (m : MVarId) (c : Config := {}) : MetaM (Array Suggestion) := do
let some selector := premiseSelectorExt.getState ( getEnv) |
throwError "No premise selector registered. \
(Note the Lean does not provide a default premise selector, these must be installed by a downstream library.)"
selector m c
/-!
Currently the registration mechanism is just global state.
This means that if multiple modules register premise selectors,
the behaviour will be dependent on the order of loading modules.
We should replace this with a mechanism so that
premise selectors are configured via options in the `lakefile`, and
commands are only used to override in a single declaration or file.
-/
/-- Set the current premise selector.-/
def registerPremiseSelector (selector : Selector) : CoreM Unit := do
modifyEnv fun env => premiseSelectorExt.setState env (some selector)
open Lean Elab Command in
@[builtin_command_elab setPremiseSelectorCmd, inherit_doc setPremiseSelectorCmd]
def elabSetPremiseSelector : CommandElab
| `(command| set_premise_selector $selector) => do
let selector liftTermElabM do
try
let selectorTerm Term.elabTermEnsuringType selector (some (Expr.const ``Selector []))
unsafe Meta.evalExpr Selector (Expr.const ``Selector []) selectorTerm
catch _ =>
throwError "Failed to elaborate {selector} as a `MVarId → Config → MetaM (Array Suggestion)`."
liftCoreM (registerPremiseSelector selector)
| _ => throwUnsupportedSyntax
open Lean.Elab.Tactic in
@[builtin_tactic Lean.Parser.Tactic.suggestPremises] def evalSuggestPremises : Tactic := fun _ =>
liftMetaTactic1 fun mvarId => do
let suggestions select mvarId
logInfo m!"Premise suggestions: {suggestions.map (·.name)}"
return mvarId
end Lean.PremiseSelection

View File

@@ -0,0 +1,94 @@
/-
Copyright (c) 2025 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kim Morrison
-/
module
prelude
public import Lean.PremiseSelection.Basic
import Lean.Util.FoldConsts
import Lean.Meta.Basic
/-!
# MePo premise selection
This is a naive implement of the MePo premise selection algorithm, from
"Lightweight relevance filtering for machine-generated resolution problems" by Meng and Paulson.
It needs to be tuned and evaluated for Lean.
-/
open Lean
namespace Lean.PremiseSelection.MePo
def symbolFrequency (env : Environment) : NameMap Nat := Id.run do
let mut map := {}
for (_, ci) in env.constants do
for n' in ci.type.getUsedConstantsAsSet do
map := map.alter n' fun i? => some (i?.getD 0 + 1)
return map
def weightedScore (weight : Name Float) (relevant candidate : NameSet) : Float :=
let S := candidate
let R := relevant S
let R' := S \ R
let M := R.foldl (fun acc n => acc + weight n) 0
M / (M + R'.size.toFloat)
-- This function is taken from the MePo paper and needs to be tuned.
def weightFunction (n : Nat) := 1.0 + 2.0 / (n.log2.toFloat + 1.0)
def frequencyScore (frequency : Name Nat) (relevant candidate : NameSet) : Float :=
weightedScore (fun n => weightFunction (frequency n)) relevant candidate
def unweightedScore (relevant candidate : NameSet) : Float := weightedScore (fun _ => 1) relevant candidate
def mepo (initialRelevant : NameSet) (score : NameSet NameSet Float) (accept : ConstantInfo CoreM Bool) (p : Float) (c : Float) : CoreM (Array (Name × Float)) := do
let env getEnv
let mut p := p
let mut candidates := #[]
let mut relevant := initialRelevant
let mut accepted : Array (Name × Float) := {}
for (n, ci) in env.constants do
if accept ci then
candidates := candidates.push (n, ci.type.getUsedConstantsAsSet)
while candidates.size > 0 do
let (newAccepted, candidates') := candidates.map (fun (n, c) => (n, c, score relevant c)) |>.partition fun (_, _, s) => p s
if newAccepted.isEmpty then return accepted
accepted := newAccepted.foldl (fun acc (n, _, s) => acc.push (n, s)) accepted
candidates := candidates'.map fun (n, c, _) => (n, c)
relevant := newAccepted.foldl (fun acc (_, ns, _) => acc ++ ns) relevant
p := p + (1 - p) / c
return accepted
open Lean Meta MVarId in
def _root_.Lean.MVarId.getConstants (g : MVarId) : MetaM NameSet := withContext g do
let mut c := ( g.getType).getUsedConstantsAsSet
for t in ( getLocalHyps) do
c := c ( inferType t).getUsedConstantsAsSet
return c
end MePo
open MePo
-- The values of p := 0.6 and c := 2.4 are taken from the MePo paper, and need to be tuned.
-- When retrieving ≤32 premises for use by downstream automation, Thomas Zhu suggests that c := 0.5 is optimal.
public def mepoSelector (useRarity : Bool) (p : Float := 0.6) (c : Float := 2.4) : Selector := fun g config => do
let constants g.getConstants
let env getEnv
let score := if useRarity then
let frequency := symbolFrequency env
frequencyScore (frequency.getD · 0)
else
unweightedScore
let accept := fun ci => return !isDeniedPremise env ci.name
let suggestions mepo constants score accept p c
let suggestions := suggestions
|>.map (fun (n, s) => { name := n, score := s })
|>.reverse -- we favor constants that appear at the end of `env.constants`
match config.maxSuggestions with
| none => return suggestions
| some k => return suggestions.take k

View File

@@ -0,0 +1,26 @@
import Lean.PremiseSelection.MePo
set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := false)
example (a b : Int) : a + b = b + a := by
suggest_premises
sorry
-- #time
example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by
suggest_premises
sorry
-- `useRarity` is too slow in practice: it requires analyzing all the types in the environment.
-- It would need to be cached.
-- set_premise_selector Lean.PremiseSelection.mepoSelector (useRarity := true)
-- example (a b : Int) : a + b = b + a := by
-- suggest_premises
-- sorry
-- #time
-- example (x y z : List Int) : x ++ y ++ z = x ++ (y ++ z) := by
-- suggest_premises
-- sorry