perf: separate meta and non-meta initializers (#12016)

This PR enables the module system, in cooperation with the linker, to
separate meta and non-meta code in native binaries. In particular, this
ensures tactics merely used in proofs do not make it into the final
binary. A simple example using `meta import Lean` has its binary size
reduced from 130MB to 1.7MB.

# Breaking change

`importModules (loadExts := true)` must now be preceded by
`enableInitializersExecution`. This was always the case for correct
importing but is now enforced and checked eagerly.
This commit is contained in:
Sebastian Ullrich
2026-02-26 09:05:19 +01:00
committed by GitHub
parent 65a0c61806
commit b1a991eee0
11 changed files with 138 additions and 83 deletions

View File

@@ -39,9 +39,9 @@ abbrev M := ReaderT Context (EStateM String String)
@[inline] def getModName : M Name := Context.modName <$> read
@[inline] def getModInitFn : M String := do
@[inline] def getModInitFn (phases : IRPhases) : M String := do
let pkg? := ( getEnv).getModulePackage?
return mkModuleInitializationFunctionName ( getModName) pkg?
return mkModuleInitializationFunctionName (phases := phases) ( getModName) pkg?
def getDecl (n : Name) : M Decl := do
let env getEnv
@@ -343,7 +343,7 @@ def emitMainFn : M Unit := do
/- We disable panic messages because they do not mesh well with extracted closed terms.
See issue #534. We can remove this workaround after we implement issue #467. -/
emitLn "lean_set_panic_messages(false);"
emitLn s!"res = {← getModInitFn}(1 /* builtin */);"
emitLn s!"res = {← getModInitFn (phases := if env.header.isModule then .runtime else .all)}(1 /* builtin */);"
emitLn "lean_set_panic_messages(true);"
emitLns ["lean_io_mark_end_initialization();",
"if (lean_io_result_is_ok(res)) {",
@@ -887,24 +887,21 @@ def emitMarkPersistent (d : Decl) (n : Name) : M Unit := do
emitCName n
emitLn ");"
def emitDeclInit (d : Decl) : M Unit := do
def withErrRet (emitIORes : M Unit) : M Unit := do
emit "res = "; emitIORes; emitLn ";"
emitLn "if (lean_io_result_is_error(res)) return res;"
def emitDeclInit (d : Decl) (isBuiltin : Bool) : M Unit := do
let env getEnv
let n := d.name
if isIOUnitInitFn env n then
if isIOUnitBuiltinInitFn env n then
emit "if (builtin) {"
emit "res = "; emitCName n; emitLn "();"
emitLn "if (lean_io_result_is_error(res)) return res;"
if (isBuiltin && isIOUnitBuiltinInitFn env n) || isIOUnitInitFn env n then
withErrRet do
emitCName n; emitLn "()"
emitLn "lean_dec_ref(res);"
if isIOUnitBuiltinInitFn env n then
emit "}"
else if d.params.size == 0 then
match getInitFnNameFor? env d.name with
| some initFn =>
if getBuiltinInitFnNameFor? env d.name |>.isSome then
emit "if (builtin) {"
emit "res = "; emitCName initFn; emitLn "();"
emitLn "if (lean_io_result_is_error(res)) return res;"
if let some initFn := (guard isBuiltin *> getBuiltinInitFnNameFor? env d.name) <|> getInitFnNameFor? env d.name then
withErrRet do
emitCName initFn; emitLn "()"
emitCName n
if d.resultType.isScalar then
emitLn (" = " ++ getUnboxOpName d.resultType ++ "(lean_io_result_get_value(res));")
@@ -912,41 +909,78 @@ def emitDeclInit (d : Decl) : M Unit := do
emitLn " = lean_io_result_get_value(res);"
emitMarkPersistent d n
emitLn "lean_dec_ref(res);"
if getBuiltinInitFnNameFor? env d.name |>.isSome then
emit "}"
| _ =>
if !isClosedTermName env d.name && !isSimpleGroundDecl env d.name then
emitCName n; emit " = "; emitCInitName n; emitLn "();"; emitMarkPersistent d n
else if !isClosedTermName env d.name && !isSimpleGroundDecl env d.name then
emitCName n; emit " = "; emitCInitName n; emitLn "();"; emitMarkPersistent d n
def emitInitFn : M Unit := do
def emitInitFn (phases : IRPhases) : M Unit := do
let env getEnv
let impInitFns env.imports.mapM fun imp => do
let impInitFns env.imports.filterMapM fun imp => do
if phases != .all && imp.isMeta != (phases == .comptime) then
return none
let some idx := env.getModuleIdx? imp.module
| throw "(internal) import without module index" -- should be unreachable
let pkg? := env.getModulePackageByIdx? idx
let fn := mkModuleInitializationFunctionName (phases := if phases == .all then .all else if imp.isMeta then .runtime else phases) imp.module pkg?
emitLn s!"lean_object* {fn}(uint8_t builtin);"
return some fn
let initialized := s!"_G_{mkModuleInitializationPrefix phases}initialized"
emitLns [
s!"static bool {initialized} = false;",
s!"LEAN_EXPORT lean_object* {← getModInitFn (phases := phases)}(uint8_t builtin) \{",
"lean_object * res;",
s!"if ({initialized}) return lean_io_result_mk_ok(lean_box(0));",
s!"{initialized} = true;"
]
impInitFns.forM fun fn => do
withErrRet do
emitLn s!"{fn}(builtin)"
emitLn "lean_dec_ref(res);"
let decls := getDecls env
for d in decls.reverse do
if phases == .all || (phases == .comptime) == isMarkedMeta env d.name then
emitDeclInit d (isBuiltin := phases != .comptime)
emitLns ["return lean_io_result_mk_ok(lean_box(0));", "}"]
/-- Init function used before phase split under module system, keep for compatibility. -/
def emitLegacyInitFn : M Unit := do
let env getEnv
let impInitFns env.imports.filterMapM fun imp => do
let some idx := env.getModuleIdx? imp.module
| throw "(internal) import without module index" -- should be unreachable
let pkg? := env.getModulePackageByIdx? idx
let fn := mkModuleInitializationFunctionName imp.module pkg?
emitLn s!"lean_object* {fn}(uint8_t builtin);"
return fn
return some fn
let initialized := s!"_G_initialized"
emitLns [
"static bool _G_initialized = false;",
s!"LEAN_EXPORT lean_object* {← getModInitFn}(uint8_t builtin) \{",
s!"static bool {initialized} = false;",
s!"LEAN_EXPORT lean_object* {← getModInitFn (phases := .all)}(uint8_t builtin) \{",
"lean_object * res;",
"if (_G_initialized) return lean_io_result_mk_ok(lean_box(0));",
"_G_initialized = true;"
s!"if ({initialized}) return lean_io_result_mk_ok(lean_box(0));",
s!"{initialized} = true;"
]
impInitFns.forM fun fn => emitLns [
s!"res = {fn}(builtin);",
"if (lean_io_result_is_error(res)) return res;",
"lean_dec_ref(res);"]
let decls := getDecls env
decls.reverse.forM emitDeclInit
emitLns ["return lean_io_result_mk_ok(lean_box(0));", "}"]
impInitFns.forM fun fn => do
withErrRet do
emitLn s!"{fn}(builtin)"
emitLn "lean_dec_ref(res);"
withErrRet do
emitLn s!"{← getModInitFn (phases := .runtime)}(builtin)"
emitLn "lean_dec_ref(res);"
withErrRet do
emitLn s!"{← getModInitFn (phases := .comptime)}(builtin)"
emitLn "lean_dec_ref(res);"
emitLns [s!"return {← getModInitFn (phases := .all)}(builtin);", "}"]
def main : M Unit := do
emitFileHeader
emitFnDecls
emitFns
emitInitFn
if ( getEnv).header.isModule then
emitInitFn (phases := .runtime)
emitInitFn (phases := .comptime)
emitLegacyInitFn
else
emitInitFn (phases := .all)
emitMainFnIfNeeded
emitFileFooter

View File

@@ -21,7 +21,7 @@ def isTailCallTo (g : Name) (b : FnBody) : Bool :=
| _ => false
def usesModuleFrom (env : Environment) (modulePrefix : Name) : Bool :=
env.allImportedModuleNames.toList.any fun modName => modulePrefix.isPrefixOf modName
env.header.modules.any fun mod => mod.irPhases != .comptime && modulePrefix.isPrefixOf mod.module
namespace CollectUsedDecls

View File

@@ -37,8 +37,8 @@ Run the initializer of the given module (without `builtin_initialize` commands).
Return `false` if the initializer is not available as native code.
Initializers do not have corresponding Lean definitions, so they cannot be interpreted in this case.
-/
@[inline] private unsafe def runModInit (mod : Name) (pkg? : Option String) : IO Bool :=
runModInitCore (mkModuleInitializationFunctionName mod pkg?)
@[inline] private unsafe def runModInit (mod : Name) (pkg? : Option String) (phases : IRPhases) : IO Bool :=
runModInitCore (mkModuleInitializationFunctionName mod pkg? phases)
/-- Run the initializer for `decl` and store its value for global access. Should only be used while importing. -/
@[extern "lean_run_init"]
@@ -160,37 +160,46 @@ def declareBuiltin (forDecl : Name) (value : Expr) : CoreM Unit :=
@[export lean_run_init_attrs]
private unsafe def runInitAttrs (env : Environment) (opts : Options) : IO Unit := do
if ( isInitializerExecutionEnabled) then
-- **Note**: `ModuleIdx` is not an abbreviation, and we don't have instances for it.
-- Thus, we use `(modIdx : Nat)`
for mod in env.header.moduleNames, (modIdx : Nat) in 0...* do
-- any native Lean code reachable by the interpreter (i.e. from shared
-- libraries with their corresponding module in the Environment) must
-- first be initialized
let pkg? := env.getModulePackageByIdx? modIdx
if ( runModInit mod pkg?) then
if !( isInitializerExecutionEnabled) then
throw <| IO.userError "`enableInitializerExecution` must be run before calling `importModules (loadExts := true)`"
-- **Note**: `ModuleIdx` is not an abbreviation, and we don't have instances for it.
-- Thus, we use `(modIdx : Nat)`
for mod in env.header.modules, (modIdx : Nat) in 0...* do
let initRuntime := Elab.inServer.get opts || mod.irPhases != .runtime
-- any native Lean code reachable by the interpreter (i.e. from shared
-- libraries with their corresponding module in the Environment) must
-- first be initialized
let pkg? := env.getModulePackageByIdx? modIdx
if env.header.isModule && /- TODO: remove after reboostrap -/ false then
let initializedRuntime pure initRuntime <&&> runModInit (phases := .runtime) mod.module pkg?
let initializedComptime runModInit (phases := .comptime) mod.module pkg?
if initializedRuntime || initializedComptime then
continue
-- As `[init]` decls can have global side effects, ensure we run them at most once,
-- just like the compiled code does.
if ( interpretedModInits.get).contains mod then
else
if ( runModInit (phases := .all) mod.module pkg?) then
continue
interpretedModInits.modify (·.insert mod)
let modEntries := regularInitAttr.ext.getModuleEntries env modIdx
-- `getModuleIREntries` is identical to `getModuleEntries` if we loaded only one of
-- .olean (from `meta initialize`)/.ir (`initialize` via transitive `meta import`)
-- so deduplicate (these lists should be very short).
-- If we have both, we should not need to worry about their relative ordering as `meta` and
-- non-`meta` initialize should not have interdependencies.
let modEntries := modEntries ++ (regularInitAttr.ext.getModuleIREntries env modIdx).filter (!modEntries.contains ·)
for (decl, initDecl) in modEntries do
-- Skip initializers we do not have IR for; they should not be reachable by interpretation.
if !Elab.inServer.get opts && getIRPhases env decl == .runtime then
continue
if initDecl.isAnonymous then
-- As `[init]` decls can have global side effects, ensure we run them at most once,
-- just like the compiled code does.
if ( interpretedModInits.get).contains mod.module then
continue
interpretedModInits.modify (·.insert mod.module)
let modEntries := regularInitAttr.ext.getModuleEntries env modIdx
-- `getModuleIREntries` is identical to `getModuleEntries` if we loaded only one of
-- .olean (from `meta initialize`)/.ir (`initialize` via transitive `meta import`)
-- so deduplicate (these lists should be very short).
-- If we have both, we should not need to worry about their relative ordering as `meta` and
-- non-`meta` initialize should not have interdependencies.
let modEntries := modEntries ++ (regularInitAttr.ext.getModuleIREntries env modIdx).filter (!modEntries.contains ·)
for (decl, initDecl) in modEntries do
if !initRuntime && getIRPhases env decl == .runtime then
continue
if initDecl.isAnonymous then
-- Don't check `meta` again as it would not respect `Elab.inServer`
let initFn IO.ofExcept <| env.evalConst (checkMeta := false) (IO Unit) opts decl
initFn
else
runInit env opts decl initDecl
let initFn IO.ofExcept <| env.evalConst (checkMeta := false) (IO Unit) opts decl
initFn
else
runInit env opts decl initDecl
end Lean

View File

@@ -157,8 +157,14 @@ public def mkModuleInitializationStem (moduleName : Name) (pkg? : Option PkgId :
let pre := pkg?.elim "" (s!"{·.mangle}_")
moduleName.mangle pre
public def mkModuleInitializationFunctionName (moduleName : Name) (pkg? : Option PkgId := none) : String :=
"initialize_" ++ mkModuleInitializationStem moduleName pkg?
public def mkModuleInitializationPrefix (phases : IRPhases) : String :=
match phases with
| .comptime => "meta_"
| .runtime => "runtime_"
| .all => ""
public def mkModuleInitializationFunctionName (moduleName : Name) (pkg? : Option PkgId := none) (phases : IRPhases := .all) : String :=
mkModuleInitializationPrefix phases ++ "initialize_" ++ mkModuleInitializationStem moduleName pkg?
public def mkPackageSymbolPrefix (pkg? : Option PkgId) : String :=
pkg?.elim "l_" (s!"lp_{·.mangle}_")

View File

@@ -142,16 +142,6 @@ structure ModuleData where
entries : Array (Name × Array EnvExtensionEntry)
deriving Inhabited
/-- Phases for which some IR is available for execution. -/
inductive IRPhases where
/-- Available for execution in the final native code. -/
| runtime
/-- Available for execution during elaboration. -/
| comptime
/-- Available during run time and compile time. -/
| all
deriving Inhabited, BEq, Repr
/-- Import including information resulting from processing of the entire import DAG. -/
structure EffectiveImport extends Import where
/-- Phases for which the import's IR is available. -/

View File

@@ -44,6 +44,16 @@ instance : ToString Import := ⟨fun imp =>
s!"{if imp.isExported then "public " else ""}{if imp.isMeta then "meta " else ""}import \
{if imp.importAll then "all " else ""}{imp.module}"
/-- Phases for which some IR is available for execution. -/
inductive IRPhases where
/-- Available for execution in the final native code. -/
| runtime
/-- Available for execution during elaboration. -/
| comptime
/-- Available during run time and compile time. -/
| all
deriving Inhabited, BEq, Repr
/-- Abstract structure of a module's header. -/
structure ModuleHeader where
/-- The module's direct imports (i.e., those listed in the header). -/

View File

@@ -10,13 +10,13 @@ import gdb
import gdb.printing
def is_scalar(o):
return o.cast(gdb.lookup_type('uintptr_t')) & 1 == 1
return o.cast(gdb.lookup_type('size_t')) & 1 == 1
def box(n):
return gdb.Value(n << 1 | 1).cast(gdb.lookup_type('lean_object').pointer())
def unbox(o):
return o.cast(gdb.lookup_type('uintptr_t')) >> 1
return o.cast(gdb.lookup_type('size_t')) >> 1
def to_cnstr(o):
return o.cast(gdb.lookup_type('lean_ctor_object').pointer())

View File

@@ -34,6 +34,8 @@ public def importModulesUsingCache
: IO Environment := do
if let some env := ( importEnvCache.get)[imports]? then
return env
unsafe enableInitializersExecution -- needed for `loadExts`
let env importModules (loadExts := true) imports opts trustLevel
importEnvCache.modify (·.insert imports env)
return env

View File

@@ -1,5 +1,7 @@
#include "util/options.h"
// Dear CI, please update stage 0!
namespace lean {
options get_default_options() {
options opts;

View File

@@ -7,6 +7,7 @@ instance : ToFormat InstanceEntry where
format e := format e.val
unsafe def tst1 : IO Unit := do
enableInitializersExecution
let env importModules (loadExts := true) #[{module := `Lean}] {}
let aux : MetaM Unit := do
let insts getGlobalInstancesIndex

View File

@@ -27,6 +27,7 @@ def mkCoreState (imports : Array Lean.Import) (loadExts : Bool := false) :
/-- Run a TacticM test with a fresh True goal. -/
def runTacticTest {α : Type} (test : Lean.Elab.Tactic.TacticM α) : IO α := do
unsafe Lean.enableInitializersExecution
let (coreCtx, coreState) mkCoreState #[{module := `Init}] (loadExts := true)
let (result, _) (do
let goal Lean.Meta.mkFreshExprMVar (Lean.mkConst ``True)