feat: share check_system interval timer between C++ and Lean

Factor the interval monitoring into a standalone `check_system_interval` function exposed to Lean via `@[extern "lean_check_system_interval"]`. Call it from both the C++ `check_system` and the Lean-level `Core.checkInterrupted`, so that CPU time spent in Lean elaboration (e.g. simp) is properly tracked and attributed. Previously the timer only fired from C++ kernel code, so gaps caused by Lean-level elaboration were misleadingly attributed to the kernel component that happened to call `check_system` next. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feat: add backtrace to check_system interval warnings
2026-04-01 01:34:06 +00:00 · 2026-03-31 16:28:36 +00:00 · 2026-03-31 16:17:27 +00:00 · 2026-03-31 13:51:13 +00:00
2 changed files with 68 additions and 0 deletions
--- a/src/Lean/CoreM.lean
+++ b/src/Lean/CoreM.lean
@@ -448,6 +448,14 @@ Note that the value of `ctx.initHeartbeats` is ignored and replaced with `IO.get
 protected def withIncRecDepth [Monad m] [MonadControlT CoreM m] (x : m α) : m α :=
  controlAt CoreM fun runInBase => withIncRecDepth (runInBase x)

+/--
+Record a check-in for the `LEAN_CHECK_SYSTEM_INTERVAL_MS` monitoring.
+When that env var is set, warns on stderr if too much CPU time has elapsed since
+the last check-in from either the C++ `check_system` or this function.
+-/
+@[extern "lean_check_system_interval"]
+opaque checkSystemInterval (componentName : @& String) : BaseIO Unit
+
 /--
 Throws an internal interrupt exception if cancellation has been requested. The exception is not
 caught by `try catch` but is intended to be caught by `Command.withLoggingExceptions` at the top
@@ -455,6 +463,7 @@ level of elaboration. In particular, we want to skip producing further increment
 the exception has been thrown.
 -/
@[inline] def checkInterrupted : CoreM Unit := do
+  checkSystemInterval "Lean elaborator"
  if let some tk := (← read).cancelTk? then
    if (← tk.isSet) then
      throwInterruptException
--- a/src/runtime/interrupt.cpp
+++ b/src/runtime/interrupt.cpp
@@ -5,6 +5,9 @@ Released under Apache 2.0 license as described in the file LICENSE.
 Author: Leonardo de Moura
 */
 #include <limits>
+#include <cstdlib>
+#include <ctime>
+#include <execinfo.h>
 #include "runtime/thread.h"
 #include "runtime/interrupt.h"
 #include "runtime/exception.h"
@@ -16,6 +19,61 @@ namespace lean {
 LEAN_THREAD_VALUE(size_t, g_max_heartbeat, 0);
 LEAN_THREAD_VALUE(size_t, g_heartbeat, 0);

+// --- check_system interval monitoring ---
+// When LEAN_CHECK_SYSTEM_INTERVAL_MS is set, warn on stderr if check_system
+// is not called within the given interval (in milliseconds of CPU time) on the
+// current thread. Uses CLOCK_THREAD_CPUTIME_ID so that IO waits (sleep, network,
+// disk) do not count towards the interval.
+
+// 0 = disabled
+static unsigned g_check_system_interval_ms = 0;
+static bool g_check_system_interval_initialized = false;
+
+static unsigned get_check_system_interval_ms() {
+    if (!g_check_system_interval_initialized) {
+        g_check_system_interval_initialized = true;
+        if (const char * env = std::getenv("LEAN_CHECK_SYSTEM_INTERVAL_MS")) {
+            g_check_system_interval_ms = std::atoi(env);
+        }
+    }
+    return g_check_system_interval_ms;
+}
+
+static uint64_t thread_cpu_time_ns() {
+    struct timespec ts;
+    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+    return static_cast<uint64_t>(ts.tv_sec) * 1000000000ULL + static_cast<uint64_t>(ts.tv_nsec);
+}
+
+// Thread-local: last CPU time when check_system was called on this thread.
+LEAN_THREAD_VALUE(uint64_t, g_last_check_system_ns, 0);
+
+static void check_system_interval(char const * component_name) {
+    unsigned interval_ms = get_check_system_interval_ms();
+    if (interval_ms > 0) {
+        uint64_t now_ns = thread_cpu_time_ns();
+        uint64_t last_ns = g_last_check_system_ns;
+        g_last_check_system_ns = now_ns;
+        if (last_ns != 0) {
+            uint64_t elapsed_ms = (now_ns - last_ns) / 1000000;
+            if (elapsed_ms > interval_ms) {
+                fprintf(stderr,
+                    "[check_system] WARNING: %llu ms CPU time since last check_system call "
+                    "(component: %s)\n",
+                    (unsigned long long)elapsed_ms, component_name);
+                void * bt_buf[64];
+                int nptrs = backtrace(bt_buf, 64);
+                backtrace_symbols_fd(bt_buf, nptrs, 2); // fd 2 = stderr
+            }
+        }
+    }
+}
+
+extern "C" LEAN_EXPORT obj_res lean_check_system_interval(b_lean_obj_arg component_name) {
+    check_system_interval(lean_string_cstr(component_name));
+    return lean_io_result_mk_ok(lean_box(0));
+}
+
 extern "C" LEAN_EXPORT obj_res lean_internal_get_default_max_heartbeat() {
 #ifdef LEAN_DEFAULT_MAX_HEARTBEAT
    return lean_box(LEAN_DEFAULT_MAX_HEARTBEAT);
@@ -71,6 +129,7 @@ void check_interrupted() {
 }

 void check_system(char const * component_name, bool do_check_interrupted) {
+    check_system_interval(component_name);
    check_stack(component_name);
    check_memory(component_name);
    if (do_check_interrupted) {