Compare commits

...

4 Commits

Author SHA1 Message Date
Kim Morrison
2bc5850261 chore: update stage0 2025-10-20 19:45:02 +11:00
Kim Morrison
4d690059e2 refactor 2025-10-20 09:54:04 +02:00
Kim Morrison
152dc08556 symbolFrequencyExt 2025-10-20 05:47:53 +02:00
Kim Morrison
d8af8e485c chore: cleanup of MePo 2025-10-20 05:12:38 +02:00
261 changed files with 57 additions and 42 deletions

View File

@@ -56,7 +56,7 @@ structure Config where
/--
The maximum number of suggestions to return.
-/
maxSuggestions? : Option Nat := none
maxSuggestions : Nat := 100
/--
The tactic that is calling the premise selection, e.g. `simp`, `grind`, or `aesop`.
This may be used to adjust the score of the suggestions
@@ -79,9 +79,6 @@ structure Config where
-/
hint : Option String := none
def Config.maxSuggestions (c : Config) : Nat :=
c.maxSuggestions?.getD 100
abbrev Selector : Type := MVarId Config MetaM (Array Suggestion)
/--
@@ -109,9 +106,7 @@ the `maxSuggestions` field in `Config` is respected, post-hoc.
-/
def maxSuggestions (selector : Selector) : Selector := fun g c => do
let suggestions selector g c
match c.maxSuggestions? with
| none => return suggestions
| some max => return suggestions.take max
return suggestions.take c.maxSuggestions
/-- Combine two premise selectors, returning the best suggestions. -/
def combine (selector₁ : Selector) (selector₂ : Selector) : Selector := fun g c => do
@@ -129,9 +124,7 @@ def combine (selector₁ : Selector) (selector₂ : Selector) : Selector := fun
let deduped := dedupMap.valuesArray
let sorted := deduped.qsort (fun s₁ s₂ => s₁.score > s₂.score)
match c.maxSuggestions? with
| none => return sorted
| some max => return sorted.take max
return sorted.take c.maxSuggestions
end Selector

View File

@@ -22,19 +22,38 @@ open Lean
namespace Lean.PremiseSelection.MePo
def symbolFrequency (env : Environment) : NameMap Nat := Id.run do
let mut map := {}
for (_, ci) in env.constants do
for n' in ci.type.getUsedConstantsAsSet do
map := map.alter n' fun i? => some (i?.getD 0 + 1)
return map
builtin_initialize registerTraceClass `mepo
local instance : Zero (NameMap Nat) :=
local instance : Add (NameMap Nat) where
add x y := y.foldl (init := x) fun x' n c => x'.insert n (x'.getD n 0 + c)
public builtin_initialize symbolFrequencyExt : PersistentEnvExtension (NameMap Nat) Empty (NameMap Nat)
registerPersistentEnvExtension {
name := `symbolFrequency
mkInitial := pure
addImportedFn := fun mapss _ => pure <|
mapss.foldl (init := 0) fun acc maps => maps.foldl (init := acc) fun acc map => acc + map
addEntryFn := nofun
exportEntriesFnEx := fun env _ _ =>
let r := env.constants.map₂.foldl (init := ( : NameMap Nat)) (fun acc n ci =>
if n.isInternalDetail then
acc
else
ci.type.foldConsts (init := acc) fun n' acc => acc.alter n' fun i? => some (i?.getD 0 + 1))
#[r]
statsFn := fun _ => "symbol frequency extension"
}
public def symbolFrequency (env : Environment) (n : Name) : Nat :=
symbolFrequencyExt.getState env |>.getD n 0
def weightedScore (weight : Name Float) (relevant candidate : NameSet) : Float :=
let S := candidate
let R := relevant S
let R' := S \ R
let R' := (S \ R).size.toFloat
let M := R.foldl (fun acc n => acc + weight n) 0
M / (M + R'.size.toFloat)
M / (M + R')
-- This function is taken from the MePo paper and needs to be tuned.
def weightFunction (n : Nat) := 1.0 + 2.0 / (n.log2.toFloat + 1.0)
@@ -44,23 +63,29 @@ def frequencyScore (frequency : Name → Nat) (relevant candidate : NameSet) : F
def unweightedScore (relevant candidate : NameSet) : Float := weightedScore (fun _ => 1) relevant candidate
def mepo (initialRelevant : NameSet) (score : NameSet NameSet Float) (accept : ConstantInfo CoreM Bool) (p : Float) (c : Float) : CoreM (Array (Name × Float)) := do
def mepo (initialRelevant : NameSet) (score : NameSet NameSet Float) (accept : ConstantInfo CoreM Bool)
(maxSuggestions : Nat) (p : Float) (c : Float) : CoreM (Array Suggestion) := do
let env getEnv
let mut p := p
let mut candidates := #[]
let mut relevant := initialRelevant
let mut accepted : Array (Name × Float) := {}
let mut accepted : Array Suggestion := {}
for (n, ci) in env.constants do
if accept ci then
candidates := candidates.push (n, ci.type.getUsedConstantsAsSet)
while candidates.size > 0 do
let (newAccepted, candidates') := candidates.map (fun (n, c) => (n, c, score relevant c)) |>.partition fun (_, _, s) => p s
while candidates.size > 0 && accepted.size < maxSuggestions do
trace[mepo] m!"Considering candidates with threshold {p}."
trace[mepo] m!"Current relevant set: {relevant.toList}."
let (newAccepted, candidates') := candidates.map
(fun (n, c) => (n, c, score relevant c))
|>.partition fun (_, _, s) => p s
if newAccepted.isEmpty then return accepted
accepted := newAccepted.foldl (fun acc (n, _, s) => acc.push (n, s)) accepted
trace[mepo] m!"Accepted {newAccepted.map fun (n, _, s) => (n, s)}."
accepted := newAccepted.foldl (fun acc (n, _, s) => acc.push { name := n, score := s }) accepted
candidates := candidates'.map fun (n, c, _) => (n, c)
relevant := newAccepted.foldl (fun acc (_, ns, _) => acc ++ ns) relevant
p := p + (1 - p) / c
return accepted
return accepted.qsort (fun a b => a.score > b.score)
open Lean Meta MVarId in
def _root_.Lean.MVarId.getConstants (g : MVarId) : MetaM NameSet := withContext g do
@@ -74,20 +99,15 @@ end MePo
open MePo
-- The values of p := 0.6 and c := 2.4 are taken from the MePo paper, and need to be tuned.
-- When retrieving ≤32 premises for use by downstream automation, Thomas Zhu suggests that c := 0.5 is optimal.
public def mepoSelector (useRarity : Bool) (p : Float := 0.6) (c : Float := 2.4) : Selector := fun g config => do
let constants g.getConstants
let env getEnv
let score := if useRarity then
let frequency := symbolFrequency env
frequencyScore (frequency.getD · 0)
frequencyScore (symbolFrequency env)
else
unweightedScore
let accept := fun ci => return !isDeniedPremise env ci.name
let suggestions mepo constants score accept p c
let suggestions mepo constants score accept config.maxSuggestions p c
let suggestions := suggestions
|>.map (fun (n, s) => { name := n, score := s })
|>.reverse -- we favor constants that appear at the end of `env.constants`
match config.maxSuggestions? with
| none => return suggestions
| some k => return suggestions.take k
return suggestions.take config.maxSuggestions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More