test: add VCGen test suite for sym mvcgen benchmarks (#12855)

This PR extracts the example programs from the sym mvcgen benchmarks
into
shared `Cases.*` modules so that both benchmarks and a new fast test
suite
can reuse them. It also renames `vcgen_deep_add_sub_cancel` to
`vcgen_add_sub_cancel_deep` for consistency.

The test suite (`test_vcgen.lean`) runs all cases at n=10, completing in
~2s vs minutes for the full benchmarks. It is wired up as a `lake test`
driver and integrated with the lean4 test/bench infrastructure via
`run_test`/`run_bench` scripts registered in `CMakeLists.txt`.

Benchmark output now uses aligned `CaseName(n):` labels. The `run_bench`
script extracts per-case vcgen and kernel timings into
`measurements.jsonl`.
Benchmarks run single-threaded (`LEAN_NUM_THREADS=1`) for
reproducibility.
`vcgen_get_throw_set` is excluded from benchmarks due to pathological
`instantiateMVars` behavior.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Sebastian Graf
2026-03-10 21:32:13 +08:00
committed by GitHub
parent e9060e7a4e
commit 49ed556479
18 changed files with 320 additions and 200 deletions

View File

@@ -287,6 +287,7 @@ add_test_pile(server_interactive *.lean)
add_test_dir(../doc/examples/compiler)
add_test_dir(bench/build BENCH PART1)
add_test_dir(bench/mvcgen/sym BENCH PART2)
add_test_dir(bench/size BENCH PART1)
add_test_dir(lake_bench/inundation BENCH PART2)

View File

@@ -0,0 +1,5 @@
import Cases.AddSubCancel
import Cases.AddSubCancelDeep
import Cases.GetThrowSet
import Cases.PurePrecond
import Cases.ReaderState

View File

@@ -0,0 +1,37 @@
import Lean
import VCGen
open Lean Meta Elab Tactic Sym Std Do SpecAttr
namespace AddSubCancel
set_option mvcgen.warning false
-- The following specs partially evaluate the specs for `get` and `set` that otherwise would need
-- multiple small substeps in the modular lifting framework. This is good practice for performance
-- sensitive use cases.
@[spec high]
theorem Spec.MonadState_get {m ps} [Monad m] [WPMonad m ps] {σ} {Q : PostCond σ (.arg σ ps)} :
fun s => Q.fst s s get (m := StateT σ m) Q := by
mvcgen
@[spec high]
theorem Spec.MonadStateOf_set {m ps} [Monad m] [WPMonad m ps] {σ} {Q : PostCond PUnit (.arg σ ps)} {s : σ} :
fun _ => Q.fst s set (m := StateT σ m) s Q := by
mvcgen
def step (v : Nat) : StateM Nat Unit := do
let s get
set (s + v)
let s get
set (s - v)
def loop (n : Nat) : StateM Nat Unit := do
match n with
| 0 => pure ()
| n+1 => step n; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
end AddSubCancel

View File

@@ -0,0 +1,43 @@
import Lean
import VCGen
open Lean Meta Elab Tactic Sym Std Do SpecAttr
namespace AddSubCancelDeep
set_option mvcgen.warning false
/-!
Same case as `AddSubCancel` but using a deep transformer stack.
-/
abbrev M := ExceptT String <| ReaderT String <| ExceptT Nat <| StateT Nat <| ExceptT Unit <| StateM Unit
-- The following specs partially evaluate the specs for `getThe` and `set` that otherwise would need
-- multiple small substeps in the modular lifting framework. This is good practice for performance
-- sensitive use cases.
@[spec high]
theorem Spec.M_getThe_Nat :
fun s₁ s₂ => Q.fst s₂ s₁ s₂ getThe (m := M) Nat Q := by
mvcgen
@[spec high]
theorem Spec.M_set_Nat (n : Nat) :
fun s₁ _ => Q.fst s₁ n set (m := M) n Q := by
mvcgen
def step (v : Nat) : M Unit := do
let s getThe Nat
set (s + v)
let s getThe Nat
set (s - v)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => step n; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
end AddSubCancelDeep

View File

@@ -0,0 +1,42 @@
import Lean
import VCGen
open Lean Meta Elab Tactic Sym Std Do SpecAttr
namespace GetThrowSet
set_option mvcgen.warning false
abbrev M := ExceptT String <| StateM Nat
-- Partially evaluated specs for best performance.
@[spec high]
theorem Spec.throw_M {e : String} :
Q.2.1 e throw (m := M) e Q := by
mvcgen
@[spec high]
theorem Spec.set_M {s : Nat} :
fun _ => Q.1 s set (m := M) s Q := by
mvcgen
@[spec high]
theorem Spec.get_M :
fun s => Q.1 s s get (m := M) Q := by
mvcgen
def step (lim : Nat) : M Unit := do
let s get
if s > lim then
throw "s is too large"
set (s + 1)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => loop n; step n
def Goal (n : Nat) : Prop := fun s => s = 0 loop n _ s => s = n
end GetThrowSet

View File

@@ -0,0 +1,33 @@
import Lean
import VCGen
open Lean Meta Elab Tactic Sym Std Do SpecAttr
namespace PurePrecond
set_option mvcgen.warning false
def flipp (_ : Bool) : StateM Bool Unit := modify not
theorem Spec.flipp_false :
fun b => b = false flipp false _ b => b = true := by
mvcgen [flipp] <;> grind
theorem Spec.flipp_true :
fun b => b = true flipp true _ b => b = false := by
mvcgen [flipp] <;> grind
attribute [spec] Spec.flipp_true Spec.flipp_false
def step : StateM Bool Unit := do
flipp true
flipp false
def loop (n : Nat) : StateM Bool Unit := do
match n with
| 0 => pure ()
| n+1 => step; loop n
def Goal (n : Nat) : Prop := fun b => b = true loop n _ b => b = true
end PurePrecond

View File

@@ -0,0 +1,43 @@
import Lean
import VCGen
open Lean Meta Elab Tactic Sym Std Do SpecAttr
namespace ReaderState
set_option mvcgen.warning false
abbrev M := ReaderT Nat <| StateM Nat
-- Partially evaluated specs for best performance.
@[spec high]
theorem Spec.M_read :
fun r s => Q.fst r r s read (m := M) Q := by
mvcgen
@[spec high]
theorem Spec.M_get :
fun r s => Q.fst s r s get (m := M) Q := by
mvcgen
@[spec high]
theorem Spec.M_set (n : Nat) :
fun r _ => Q.fst () r n set (m := M) n Q := by
mvcgen
def step : M Unit := do
let r read
let s get
set (s + r)
let s get
set (s - r)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => step; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
end ReaderState

View File

@@ -13,13 +13,20 @@ lean_lib Baseline where
lean_lib Driver where
srcDir := "lib"
lean_lib Cases where
srcDir := "cases"
@[default_target]
lean_lib VCGenBench where
roots := #[`vcgen_add_sub_cancel, `vcgen_deep_add_sub_cancel, `vcgen_get_throw_set,
`vcgen_pure_precond, `vcgen_reader_state]
roots := #[`vcgen_add_sub_cancel, `vcgen_add_sub_cancel_deep,
`vcgen_get_throw_set, `vcgen_pure_precond, `vcgen_reader_state]
moreLeanArgs := #["--tstack=100000000"]
@[default_target]
lean_lib BaselineBench where
roots := #[`baseline_add_sub_cancel]
moreLeanArgs := #["--tstack=100000000"]
@[test_driver]
lean_lib VCGenTest where
roots := #[`test_vcgen]

View File

@@ -45,7 +45,9 @@ def driver (goal : Name) (unfold : List Name) (n : Nat) (discharge : MetaM (TSyn
-- If we don't do this, kernel checking time balloons.
let expr SymM.run (shareCommon expr)
let (_, kernelMs) timeItMs (checkWithKernel expr)
let mut msg := s!"goal_{n}: {ms} ms"
let label := s!"{goal.getPrefix}({n}):"
let pad := "".pushn ' ' (24 - min label.length 24)
let mut msg := s!"{label}{pad}{ms} ms"
if let some dischargeMs := dischargeMs? then
msg := msg ++ s!", {mvarIds.length} VCs by {dischargePp}: {dischargeMs} ms"
else

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
source ../../../env_bench.sh
source "$TEST_DIR/util.sh"
rm -f measurements.jsonl
# Build dependencies first so their compilation isn't measured.
lake build Cases Driver Baseline
# Run benchmarks single-threaded for reproducible measurements.
LEAN_NUM_THREADS=1 lake build VCGenBench 2>&1 | tee vcgen.out
LEAN_NUM_THREADS=1 lake build BaselineBench 2>&1 | tee -a vcgen.out
# Parse lines like:
# AddSubCancel(1000): 528 ms, 1 VCs by grind: 245 ms, kernel: 446 ms
# into JSONL measurements.
python3 -c "
import json, re, sys
for line in open('vcgen.out'):
m = re.search(r'(\w+)\((\d+)\):\s+(\d+) ms.*kernel:\s+(\d+) ms', line)
if not m:
continue
case, n, vcgen_ms, kernel_ms = m.group(1), m.group(2), m.group(3), m.group(4)
for phase, val in [('vcgen', vcgen_ms), ('kernel', kernel_ms)]:
print(json.dumps({
'metric': f'mvcgen/sym/{case}/{n}/{phase}//wall-clock',
'value': int(val) / 1000,
'unit': 's'
}))
" >> measurements.jsonl
rm -f vcgen.out

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
source ../../../env_test.sh
source "$TEST_DIR/util.sh"
# Limit threads to avoid exhausting memory with the large thread stack.
LEAN_NUM_THREADS=1 lake test

View File

@@ -0,0 +1,40 @@
/-
Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Cases
import Driver
/-!
# VCGen Test Suite
Runs all VCGen test cases at small sizes (n=10) to verify correctness.
Each case exercises a different aspect of the VC generation:
- `AddSubCancel`: Basic add/sub loop in `StateM`
- `AddSubCancelDeep`: Same loop through a deep monad transformer stack
- `GetThrowSet`: Exception handling with `ExceptT`/`StateM`
- `PurePrecond`: Pure hypotheses `⌜φ⌝` in preconditions
- `ReaderState`: `ReaderT`/`StateM` combination
-/
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000
open AddSubCancel in
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind) [10]
open AddSubCancelDeep in
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind) [10]
open GetThrowSet in
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| sorry) [10]
open PurePrecond in
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| fail) [10]
open ReaderState in
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| sorry) [10]

View File

@@ -3,35 +3,11 @@ Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Lean
import VCGen
import Cases.AddSubCancel
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
-- The following spec is necessary because the VC gen currently has no support for unfolding spec
-- theorems, which is what we usually do for `MonadState.get`.
@[spec]
theorem Spec.MonadState_get {m ps} [Monad m] [WPMonad m ps] {σ} {Q : PostCond σ (.arg σ ps)} :
fun s => Q.fst s s get (m := StateT σ m) Q := by
simp only [Triple, WP.get_MonadState, WP.get_StateT, SPred.entails.refl]
/-!
Same benchmark as `shallow_add_sub_cancel` but using `mvcgen`.
-/
def step (v : Nat) : StateM Nat Unit := do
let s get
set (s + v)
let s get
set (s - v)
def loop (n : Nat) : StateM Nat Unit := do
match n with
| 0 => pure ()
| n+1 => step n; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
open AddSubCancel
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000

View File

@@ -0,0 +1,17 @@
/-
Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Cases.AddSubCancelDeep
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
open AddSubCancelDeep
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind)
[100, 500, 1000]
-- [1000]

View File

@@ -1,56 +0,0 @@
/-
Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Lean
import VCGen
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
set_option mvcgen.warning false
/-!
Same benchmark as `vcgen_add_sub_cancel` but using a deep transformer stack.
-/
abbrev M := ExceptT String <| ReaderT String <| ExceptT Nat <| StateT Nat <| ExceptT Unit <| StateM Unit
/-
Known issues:
* Using `StateT String` instead of `ReaderT String` picks the wrong spec for `MonadStateOf.get`; namely that on `String`.
It seems we need to disambiguate discrimination tree lookup results with defeq.
* Even using `ReaderT String` it doesn't work. TODO: Why?
* But just using `ExceptT String` works.
-/
def step (v : Nat) : M Unit := do
let s getThe Nat
set (s + v)
let s getThe Nat
set (s - v)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => step n; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
@[spec high]
theorem Spec.M_getThe_Nat :
fun s₁ s₂ => Q.fst s₂ s₁ s₂ getThe (m := M) Nat Q := by
mvcgen
@[spec high]
theorem Spec.M_set_Nat (n : Nat) :
fun s₁ _ => Q.fst s₁ n set (m := M) n Q := by
mvcgen
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000
#eval runBenchUsingTactic ``Goal [``loop, ``step] `(tactic| mvcgen') `(tactic| grind)
[100, 500, 1000]
-- [1000]

View File

@@ -3,46 +3,11 @@ Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Lean
import VCGen
import Cases.GetThrowSet
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
set_option mvcgen.warning false
abbrev M := ExceptT String <| StateM Nat
-- We partially evaluate a couple of specs for best performance.
-- If you comment those out, you will see a 4x slowdown.
@[spec high]
theorem Spec.throw_M {e : String} :
Q.2.1 e throw (m := M) e Q := by
mvcgen
@[spec high]
theorem Spec.set_M {s : Nat} :
fun _ => Q.1 s set (m := M) s Q := by
mvcgen
@[spec high]
theorem Spec.get_M :
fun s => Q.1 s s get (m := M) Q := by
mvcgen
def step (lim : Nat) : M Unit := do
let s get
if s > lim then
throw "s is too large"
set (s + 1)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => loop n; step n
def Goal (n : Nat) : Prop := fun s => s = 0 loop n _ s => s = n
open GetThrowSet
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000

View File

@@ -3,46 +3,11 @@ Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Lean
import VCGen
import Cases.PurePrecond
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
set_option mvcgen.warning false
/-!
A benchmark where VCs carry pure hypotheses `⌜φ⌝` on the left of `⊢ₛ`.
The precondition `⌜s = 0⌝` propagates through VCGen, producing VCs
of the form `⌜s₀ = 0⌝ ⊢ₛ ⌜ψ⌝`. Discharging these VCs requires
moving the pure hypothesis into the Lean local context
(e.g., via `SPred.pure_mono` or `Frame.frame`).
-/
-- Partially evaluated specs for best performance.
def flipp (_ : Bool) : StateM Bool Unit := modify not
theorem Spec.flipp_false :
fun b => b = false flipp false _ b => b = true := by
mvcgen [flipp] <;> grind
theorem Spec.flipp_true :
fun b => b = true flipp true _ b => b = false := by
mvcgen [flipp] <;> grind
attribute [spec] Spec.flipp_true Spec.flipp_false
def step : StateM Bool Unit := do
flipp true
flipp false
def loop (n : Nat) : StateM Bool Unit := do
match n with
| 0 => pure ()
| n+1 => step; loop n
def Goal (n : Nat) : Prop := fun b => b = true loop n _ b => b = true
open PurePrecond
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000

View File

@@ -3,50 +3,11 @@ Copyright (c) 2026 Lean FRO LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Sebastian Graf
-/
import Lean
import VCGen
import Cases.ReaderState
import Driver
open Lean Parser Meta Elab Tactic Sym Std Do SpecAttr
set_option mvcgen.warning false
/-!
A variant of the `vcgen_add_sub_cancel` benchmark, but reading the value to subtract from `ReaderT`.
-/
abbrev M := ReaderT Nat <| StateM Nat
-- Partially evaluated specs for best performance.
@[spec high]
theorem Spec.M_read :
fun r s => Q.fst r r s read (m := M) Q := by
mvcgen
@[spec high]
theorem Spec.M_get :
fun r s => Q.fst s r s get (m := M) Q := by
mvcgen
@[spec high]
theorem Spec.M_set (n : Nat) :
fun r _ => Q.fst () r n set (m := M) n Q := by
mvcgen
def step : M Unit := do
let r read
let s get
set (s + r)
let s get
set (s - r)
def loop (n : Nat) : M Unit := do
match n with
| 0 => pure ()
| n+1 => step; loop n
def Goal (n : Nat) : Prop := post, post loop n _ => post
open ReaderState
set_option maxRecDepth 10000
set_option maxHeartbeats 10000000