Compare commits

...

3 Commits

Author SHA1 Message Date
Joachim Breitner
7488a1f712 feat: share check_system interval timer between C++ and Lean
Factor the interval monitoring into a standalone `check_system_interval`
function exposed to Lean via `@[extern "lean_check_system_interval"]`.
Call it from both the C++ `check_system` and the Lean-level
`Core.checkInterrupted`, so that CPU time spent in Lean elaboration
(e.g. simp) is properly tracked and attributed.

Previously the timer only fired from C++ kernel code, so gaps caused by
Lean-level elaboration were misleadingly attributed to the kernel
component that happened to call `check_system` next.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 16:28:36 +00:00
Joachim Breitner
ffd0f7de85 feat: add backtrace to check_system interval warnings
This adds a stack trace dump when the check_system interval warning
fires, making it easier to identify the code path responsible for
the gap.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 16:17:27 +00:00
Joachim Breitner
4f7de4a1d9 feat: add optional check_system interval monitoring
This adds optional monitoring of how frequently `check_system` is called
per thread. When `LEAN_CHECK_SYSTEM_INTERVAL_MS` is set, a warning is
printed to stderr if the interval between consecutive calls exceeds the
threshold. This helps identify code paths where cancellation checks are
too infrequent for responsive cancellation.

Disabled by default; set e.g. `LEAN_CHECK_SYSTEM_INTERVAL_MS=1000` to
enable with a 1-second threshold.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 13:51:13 +00:00
2 changed files with 68 additions and 0 deletions

View File

@@ -448,6 +448,14 @@ Note that the value of `ctx.initHeartbeats` is ignored and replaced with `IO.get
protected def withIncRecDepth [Monad m] [MonadControlT CoreM m] (x : m α) : m α :=
controlAt CoreM fun runInBase => withIncRecDepth (runInBase x)
/--
Record a check-in for the `LEAN_CHECK_SYSTEM_INTERVAL_MS` monitoring.
When that env var is set, warns on stderr if too much CPU time has elapsed since
the last check-in from either the C++ `check_system` or this function.
-/
@[extern "lean_check_system_interval"]
opaque checkSystemInterval (componentName : @& String) : BaseIO Unit
/--
Throws an internal interrupt exception if cancellation has been requested. The exception is not
caught by `try catch` but is intended to be caught by `Command.withLoggingExceptions` at the top
@@ -455,6 +463,7 @@ level of elaboration. In particular, we want to skip producing further increment
the exception has been thrown.
-/
@[inline] def checkInterrupted : CoreM Unit := do
checkSystemInterval "Lean elaborator"
if let some tk := ( read).cancelTk? then
if ( tk.isSet) then
throwInterruptException

View File

@@ -5,6 +5,9 @@ Released under Apache 2.0 license as described in the file LICENSE.
Author: Leonardo de Moura
*/
#include <limits>
#include <cstdlib>
#include <ctime>
#include <execinfo.h>
#include "runtime/thread.h"
#include "runtime/interrupt.h"
#include "runtime/exception.h"
@@ -16,6 +19,61 @@ namespace lean {
LEAN_THREAD_VALUE(size_t, g_max_heartbeat, 0);
LEAN_THREAD_VALUE(size_t, g_heartbeat, 0);
// --- check_system interval monitoring ---
// When LEAN_CHECK_SYSTEM_INTERVAL_MS is set, warn on stderr if check_system
// is not called within the given interval (in milliseconds of CPU time) on the
// current thread. Uses CLOCK_THREAD_CPUTIME_ID so that IO waits (sleep, network,
// disk) do not count towards the interval.
// 0 = disabled
static unsigned g_check_system_interval_ms = 0;
static bool g_check_system_interval_initialized = false;
static unsigned get_check_system_interval_ms() {
if (!g_check_system_interval_initialized) {
g_check_system_interval_initialized = true;
if (const char * env = std::getenv("LEAN_CHECK_SYSTEM_INTERVAL_MS")) {
g_check_system_interval_ms = std::atoi(env);
}
}
return g_check_system_interval_ms;
}
static uint64_t thread_cpu_time_ns() {
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000ULL + static_cast<uint64_t>(ts.tv_nsec);
}
// Thread-local: last CPU time when check_system was called on this thread.
LEAN_THREAD_VALUE(uint64_t, g_last_check_system_ns, 0);
static void check_system_interval(char const * component_name) {
unsigned interval_ms = get_check_system_interval_ms();
if (interval_ms > 0) {
uint64_t now_ns = thread_cpu_time_ns();
uint64_t last_ns = g_last_check_system_ns;
g_last_check_system_ns = now_ns;
if (last_ns != 0) {
uint64_t elapsed_ms = (now_ns - last_ns) / 1000000;
if (elapsed_ms > interval_ms) {
fprintf(stderr,
"[check_system] WARNING: %llu ms CPU time since last check_system call "
"(component: %s)\n",
(unsigned long long)elapsed_ms, component_name);
void * bt_buf[64];
int nptrs = backtrace(bt_buf, 64);
backtrace_symbols_fd(bt_buf, nptrs, 2); // fd 2 = stderr
}
}
}
}
extern "C" LEAN_EXPORT obj_res lean_check_system_interval(b_lean_obj_arg component_name) {
check_system_interval(lean_string_cstr(component_name));
return lean_io_result_mk_ok(lean_box(0));
}
extern "C" LEAN_EXPORT obj_res lean_internal_get_default_max_heartbeat() {
#ifdef LEAN_DEFAULT_MAX_HEARTBEAT
return lean_box(LEAN_DEFAULT_MAX_HEARTBEAT);
@@ -71,6 +129,7 @@ void check_interrupted() {
}
void check_system(char const * component_name, bool do_check_interrupted) {
check_system_interval(component_name);
check_stack(component_name);
check_memory(component_name);
if (do_check_interrupted) {