diff --git a/script/PROFILER_README.md b/script/PROFILER_README.md new file mode 100644 index 0000000000..dd4efc340c --- /dev/null +++ b/script/PROFILER_README.md @@ -0,0 +1,178 @@ +# Lean Profiler + +Profile Lean programs with demangled names using +[samply](https://github.com/mstange/samply) and +[Firefox Profiler](https://profiler.firefox.com). + +Python 3, no external dependencies. + +## Quick start + +```bash +# One command: record, symbolicate, demangle, and open in Firefox Profiler +script/lean_profile.sh ./my_lean_binary [args...] + +# See all options +script/lean_profile.sh --help +``` + +Requirements: `samply` (`cargo install samply`), `python3`. + +## Reading demangled names + +The demangler transforms low-level C symbol names into readable Lean names +and annotates them with compact modifiers. + +### Basic names + +| Raw symbol | Demangled | +|---|---| +| `l_Lean_Meta_Sym_main` | `Lean.Meta.Sym.main` | +| `lp_std_List_map` | `List.map (std)` | +| `_init_l_Foo_bar` | `[init] Foo.bar` | +| `initialize_Init_Data` | `[module_init] Init.Data` | +| `_lean_main` | `[lean] main` | + +### Modifier flags `[...]` + +Compiler-generated suffixes are folded into a bracket annotation after the +name. These indicate *how* the function was derived from the original source +definition. + +| Flag | Meaning | Compiler suffix | +|---|---|---| +| `arity`↓ | Reduced-arity specialization | `_redArg` | +| `boxed` | Boxed calling-convention wrapper | `_boxed` | +| `impl` | Implementation detail | `_impl` | +| λ | Lambda-lifted closure | `_lam_N`, `_lambda_N`, `_elam_N` | +| `jp` | Join point | `_jp_N` | +| `closed` | Extracted closed subterm | `_closed_N` | +| `private` | Private (module-scoped) definition | `_private.Module.0.` prefix | + +Examples: + +``` +Lean.Meta.Simp.simpLambda [boxed, λ] -- boxed wrapper of a lambda-lifted closure +Lean.Meta.foo [arity↓, private] -- reduced-arity version of a private def +``` + +Multiple flags are comma-separated. Order reflects how they were collected +(innermost suffix first). + +### Specializations `spec at ...` + +When the compiler specializes a function at a particular call site, the +demangled name shows `spec at ` after the base name and its flags. +The context names the function whose body triggered the specialization, and +may carry its own modifier flags: + +``` + [] spec at [] +``` + +Examples: + +``` +-- foo specialized at call site in bar +Lean.Meta.foo spec at Lean.Meta.bar + +-- foo (with a lambda closure) specialized at bar (with reduced arity and a lambda) +Lean.Meta.foo [λ] spec at Lean.Meta.bar[λ, arity↓] + +-- chained specialization: foo specialized at bar, then at baz +Lean.Meta.foo spec at Lean.Meta.bar spec at Lean.Meta.baz[arity↓] +``` + +Context flags use the same symbols as base flags. When a context has no +flags, the brackets are omitted. + +### Other annotations + +| Pattern | Meaning | +|---|---| +| `` | Lean runtime apply function (N arguments) | +| `.cold.N` suffix | LLVM cold-path clone (infrequently executed) | +| `(pkg)` suffix | Function from package `pkg` | + +## Tools + +### `script/lean_profile.sh` -- Full profiling pipeline + +Records a profile, symbolicates it via samply's API, demangles Lean names, +and opens the result in Firefox Profiler. This is the recommended workflow. + +```bash +script/lean_profile.sh ./build/release/stage1/bin/lean src/Lean/Elab/Term.lean +``` + +Environment variables: + +| Variable | Default | Description | +|---|---|---| +| `SAMPLY_RATE` | 1000 | Sampling rate in Hz | +| `SAMPLY_PORT` | 3756 | Port for samply symbolication server | +| `SERVE_PORT` | 3757 | Port for serving the demangled profile | +| `PROFILE_KEEP` | 0 | Set to 1 to keep the temp directory | + +### `script/profiler/lean_demangle.py` -- Name demangler + +Demangles individual symbol names. Works as a stdin filter (like `c++filt`) +or with arguments. + +```bash +echo "l_Lean_Meta_Sym_main" | python3 script/profiler/lean_demangle.py +# Lean.Meta.Sym.main + +python3 script/profiler/lean_demangle.py --raw l_foo___redArg +# foo._redArg (exact name, no postprocessing) +``` + +As a Python module: + +```python +from lean_demangle import demangle_lean_name, demangle_lean_name_raw + +demangle_lean_name("l_foo___redArg") # "foo [arity↓]" +demangle_lean_name_raw("l_foo___redArg") # "foo._redArg" +``` + +### `script/profiler/symbolicate_profile.py` -- Profile symbolicator + +Calls samply's symbolication API to resolve raw addresses into symbol names, +then demangles them. Used internally by `lean_profile.sh`. + +### `script/profiler/serve_profile.py` -- Profile server + +Serves a profile JSON file to Firefox Profiler without re-symbolication +(which would overwrite demangled names). Used internally by `lean_profile.sh`. + +### `script/profiler/lean_demangle_profile.py` -- Standalone profile rewriter + +Demangles names in an already-symbolicated profile file (if you have one +from another source). + +```bash +python3 script/profiler/lean_demangle_profile.py profile.json.gz -o demangled.json.gz +``` + +## Tests + +```bash +cd script/profiler && python3 -m unittest test_demangle -v +``` + +## How it works + +The demangler is a faithful port of Lean 4's `Name.demangleAux` from +`src/Lean/Compiler/NameMangling.lean`. It reverses the encoding used by +`Name.mangle` / `Name.mangleAux` which turns hierarchical Lean names into +valid C identifiers: + +- `_` separates name components (`Lean.Meta` -> `Lean_Meta`) +- `__` encodes a literal underscore in a component name +- `_xHH`, `_uHHHH`, `_UHHHHHHHH` encode special characters +- `_N_` encodes numeric name components +- `_00` is a disambiguation prefix for ambiguous patterns + +After demangling, a postprocessing pass folds compiler-generated suffixes +into human-readable annotations (see [Reading demangled names](#reading-demangled-names)). diff --git a/script/lean_profile.sh b/script/lean_profile.sh new file mode 100755 index 0000000000..f2a5501997 --- /dev/null +++ b/script/lean_profile.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# Profile a Lean binary with demangled names. +# +# Usage: +# script/lean_profile.sh ./my_lean_binary [args...] +# +# Records a profile with samply, symbolicates via samply's API, +# demangles Lean symbol names, and opens the result in Firefox Profiler. +# +# Requirements: samply (cargo install samply), python3 +# +# Options (via environment variables): +# SAMPLY_RATE — sampling rate in Hz (default: 1000) +# SAMPLY_PORT — port for samply symbolication server (default: 3756) +# SERVE_PORT — port for serving the demangled profile (default: 3757) +# PROFILE_KEEP — set to 1 to keep the raw profile after demangling + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROFILER_DIR="$SCRIPT_DIR/profiler" +SYMBOLICATE="$PROFILER_DIR/symbolicate_profile.py" +SERVE_PROFILE="$PROFILER_DIR/serve_profile.py" + +usage() { + cat >&2 < [args...] + +Profile a Lean binary and view the results in Firefox Profiler +with demangled Lean names. + +Requirements: + samply cargo install samply + python3 (included with macOS / most Linux distros) + +Environment variables: + SAMPLY_RATE sampling rate in Hz (default: 1000) + SAMPLY_PORT port for samply symbolication server (default: 3756) + SERVE_PORT port for serving the demangled profile (default: 3757) + PROFILE_KEEP set to 1 to keep the temp directory after profiling + +Reading demangled names: + Compiler suffixes are shown as modifier flags after the name: + [arity↓] reduced-arity specialization (_redArg) + [boxed] boxed calling-convention wrapper (_boxed) + [λ] lambda-lifted closure (_lam_N, _lambda_N, _elam_N) + [jp] join point (_jp_N) + [closed] extracted closed subterm (_closed_N) + [private] private (module-scoped) def (_private.Module.0. prefix) + [impl] implementation detail (_impl) + + Specializations appear after the flags: + Lean.Meta.foo [λ] spec at Lean.Meta.bar[λ, arity↓] + = foo (with lambda closure), specialized at bar (lambda, reduced arity) + + Multiple "spec at" entries indicate chained specializations. + See script/PROFILER_README.md for full documentation. +EOF + exit "${1:-0}" +} + +if [ $# -eq 0 ]; then + usage 1 +fi + +case "${1:-}" in + -h|--help) usage 0 ;; +esac + +if ! command -v samply &>/dev/null; then + echo "error: samply not found. Install with: cargo install samply" >&2 + exit 1 +fi + +RATE="${SAMPLY_RATE:-1000}" +PORT="${SAMPLY_PORT:-3756}" +SERVE="${SERVE_PORT:-3757}" +TMPDIR=$(mktemp -d /tmp/lean-profile-XXXXXX) +TMPFILE="$TMPDIR/profile.json.gz" +DEMANGLED="$TMPDIR/profile-demangled.json.gz" +SAMPLY_LOG="$TMPDIR/samply.log" +SAMPLY_PID="" + +cleanup() { + if [ -n "$SAMPLY_PID" ]; then + kill "$SAMPLY_PID" 2>/dev/null || true + wait "$SAMPLY_PID" 2>/dev/null || true + fi + # Safety net: kill anything still on the symbolication port + lsof -ti :"$PORT" 2>/dev/null | xargs kill 2>/dev/null || true + [ "${PROFILE_KEEP:-0}" = "1" ] || rm -rf "$TMPDIR" +} +trap cleanup EXIT + +# Step 1: Record +echo "Recording profile (rate=${RATE} Hz)..." >&2 +samply record --save-only -o "$TMPFILE" -r "$RATE" "$@" + +# Step 2: Start samply server for symbolication +echo "Starting symbolication server..." >&2 +samply load --no-open -P "$PORT" "$TMPFILE" > "$SAMPLY_LOG" 2>&1 & +SAMPLY_PID=$! + +# Wait for server to be ready +for i in $(seq 1 30); do + if grep -q "Local server listening" "$SAMPLY_LOG" 2>/dev/null; then + break + fi + sleep 0.2 +done + +# Extract the token from samply's output +TOKEN=$(grep -oE '[a-z0-9]{30,}' "$SAMPLY_LOG" | head -1) + +if [ -z "$TOKEN" ]; then + echo "error: could not get samply server token" >&2 + exit 1 +fi + +SERVER_URL="http://127.0.0.1:${PORT}/${TOKEN}" + +# Step 3: Symbolicate + demangle +echo "Symbolicating and demangling..." >&2 +python3 "$SYMBOLICATE" --server "$SERVER_URL" "$TMPFILE" -o "$DEMANGLED" + +# Step 4: Kill symbolication server +kill "$SAMPLY_PID" 2>/dev/null || true +wait "$SAMPLY_PID" 2>/dev/null || true +SAMPLY_PID="" + +# Step 5: Serve the demangled profile directly (without samply's re-symbolication) +echo "Opening in Firefox Profiler..." >&2 +python3 "$SERVE_PROFILE" "$DEMANGLED" -P "$SERVE" diff --git a/script/profiler/lean_demangle.py b/script/profiler/lean_demangle.py new file mode 100644 index 0000000000..a637a2f7b3 --- /dev/null +++ b/script/profiler/lean_demangle.py @@ -0,0 +1,779 @@ +#!/usr/bin/env python3 +""" +Lean name demangler. + +Demangles C symbol names produced by the Lean 4 compiler back into +readable Lean hierarchical names. + +Usage as a filter (like c++filt): + echo "l_Lean_Meta_Sym_main" | python lean_demangle.py + +Usage as a module: + from lean_demangle import demangle_lean_name + print(demangle_lean_name("l_Lean_Meta_Sym_main")) +""" + +import sys + + +# --------------------------------------------------------------------------- +# String.mangle / unmangle +# --------------------------------------------------------------------------- + +def _is_ascii_alnum(ch): + """Check if ch is an ASCII letter or digit (matching Lean's isAlpha/isDigit).""" + return ('a' <= ch <= 'z') or ('A' <= ch <= 'Z') or ('0' <= ch <= '9') + + +def mangle_string(s): + """Port of Lean's String.mangle: escape a single string for C identifiers.""" + result = [] + for ch in s: + if _is_ascii_alnum(ch): + result.append(ch) + elif ch == '_': + result.append('__') + else: + code = ord(ch) + if code < 0x100: + result.append('_x' + format(code, '02x')) + elif code < 0x10000: + result.append('_u' + format(code, '04x')) + else: + result.append('_U' + format(code, '08x')) + return ''.join(result) + + +def _parse_hex(s, pos, n): + """Parse n lowercase hex digits at pos. Returns (new_pos, value) or None.""" + if pos + n > len(s): + return None + val = 0 + for i in range(n): + c = s[pos + i] + if '0' <= c <= '9': + val = (val << 4) | (ord(c) - ord('0')) + elif 'a' <= c <= 'f': + val = (val << 4) | (ord(c) - ord('a') + 10) + else: + return None + return (pos + n, val) + + +# --------------------------------------------------------------------------- +# Name mangling (for round-trip verification) +# --------------------------------------------------------------------------- + +def _check_disambiguation(m): + """Port of Lean's checkDisambiguation: does mangled string m need a '00' prefix?""" + pos = 0 + while pos < len(m): + ch = m[pos] + if ch == '_': + pos += 1 + continue + if ch == 'x': + return _parse_hex(m, pos + 1, 2) is not None + if ch == 'u': + return _parse_hex(m, pos + 1, 4) is not None + if ch == 'U': + return _parse_hex(m, pos + 1, 8) is not None + if '0' <= ch <= '9': + return True + return False + # all underscores or empty + return True + + +def _need_disambiguation(prev_component, mangled_next): + """Port of Lean's needDisambiguation.""" + # Check if previous component (as a string) ends with '_' + prev_ends_underscore = (isinstance(prev_component, str) and + len(prev_component) > 0 and + prev_component[-1] == '_') + return prev_ends_underscore or _check_disambiguation(mangled_next) + + +def mangle_name(components, prefix="l_"): + """ + Mangle a list of name components (str or int) into a C symbol. + Port of Lean's Name.mangle. + """ + if not components: + return prefix + + parts = [] + prev = None + for i, comp in enumerate(components): + if isinstance(comp, int): + if i == 0: + parts.append(str(comp) + '_') + else: + parts.append('_' + str(comp) + '_') + else: + m = mangle_string(comp) + if i == 0: + if _check_disambiguation(m): + parts.append('00' + m) + else: + parts.append(m) + else: + if _need_disambiguation(prev, m): + parts.append('_00' + m) + else: + parts.append('_' + m) + prev = comp + + return prefix + ''.join(parts) + + +# --------------------------------------------------------------------------- +# Name demangling +# --------------------------------------------------------------------------- + +def demangle_body(s): + """ + Demangle a string produced by Name.mangleAux (without prefix). + Returns a list of components (str or int). + + This is a faithful port of Lean's Name.demangleAux from NameMangling.lean. + """ + components = [] + length = len(s) + + def emit(comp): + components.append(comp) + + def decode_num(pos, n): + """Parse remaining digits, emit numeric component, continue.""" + while pos < length: + ch = s[pos] + if '0' <= ch <= '9': + n = n * 10 + (ord(ch) - ord('0')) + pos += 1 + else: + # Expect '_' (trailing underscore of numeric encoding) + pos += 1 # skip '_' + emit(n) + if pos >= length: + return pos + # Skip separator '_' and go to name_start + pos += 1 + return name_start(pos) + # End of string + emit(n) + return pos + + def name_start(pos): + """Start parsing a new name component.""" + if pos >= length: + return pos + ch = s[pos] + pos += 1 + if '0' <= ch <= '9': + # Check for '00' disambiguation + if ch == '0' and pos < length and s[pos] == '0': + pos += 1 + return demangle_main(pos, "", 0) + else: + return decode_num(pos, ord(ch) - ord('0')) + elif ch == '_': + return demangle_main(pos, "", 1) + else: + return demangle_main(pos, ch, 0) + + def demangle_main(pos, acc, ucount): + """Main demangling loop.""" + while pos < length: + ch = s[pos] + pos += 1 + + if ch == '_': + ucount += 1 + continue + + if ucount % 2 == 0: + # Even underscores: literal underscores in component name + acc += '_' * (ucount // 2) + ch + ucount = 0 + continue + + # Odd ucount: separator or escape + if '0' <= ch <= '9': + # End current str component, start number + emit(acc + '_' * (ucount // 2)) + if ch == '0' and pos < length and s[pos] == '0': + pos += 1 + return demangle_main(pos, "", 0) + else: + return decode_num(pos, ord(ch) - ord('0')) + + # Try hex escapes + if ch == 'x': + result = _parse_hex(s, pos, 2) + if result is not None: + new_pos, val = result + acc += '_' * (ucount // 2) + chr(val) + pos = new_pos + ucount = 0 + continue + + if ch == 'u': + result = _parse_hex(s, pos, 4) + if result is not None: + new_pos, val = result + acc += '_' * (ucount // 2) + chr(val) + pos = new_pos + ucount = 0 + continue + + if ch == 'U': + result = _parse_hex(s, pos, 8) + if result is not None: + new_pos, val = result + acc += '_' * (ucount // 2) + chr(val) + pos = new_pos + ucount = 0 + continue + + # Name separator + emit(acc) + acc = '_' * (ucount // 2) + ch + ucount = 0 + + # End of string + acc += '_' * (ucount // 2) + if acc: + emit(acc) + return pos + + name_start(0) + return components + + +# --------------------------------------------------------------------------- +# Prefix handling for lp_ (package prefix) +# --------------------------------------------------------------------------- + +def _is_valid_string_mangle(s): + """Check if s is a valid output of String.mangle (no trailing bare _).""" + pos = 0 + length = len(s) + while pos < length: + ch = s[pos] + if _is_ascii_alnum(ch): + pos += 1 + elif ch == '_': + if pos + 1 >= length: + return False # trailing bare _ + nch = s[pos + 1] + if nch == '_': + pos += 2 + elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: + pos = _parse_hex(s, pos + 2, 2)[0] + elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: + pos = _parse_hex(s, pos + 2, 4)[0] + elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: + pos = _parse_hex(s, pos + 2, 8)[0] + else: + return False + else: + return False + return True + + +def _skip_string_mangle(s, pos): + """ + Skip past a String.mangle output in s starting at pos. + Returns the position after the mangled string (where we expect the separator '_'). + This is a greedy scan. + """ + length = len(s) + while pos < length: + ch = s[pos] + if _is_ascii_alnum(ch): + pos += 1 + elif ch == '_': + if pos + 1 < length: + nch = s[pos + 1] + if nch == '_': + pos += 2 + elif nch == 'x' and _parse_hex(s, pos + 2, 2) is not None: + pos = _parse_hex(s, pos + 2, 2)[0] + elif nch == 'u' and _parse_hex(s, pos + 2, 4) is not None: + pos = _parse_hex(s, pos + 2, 4)[0] + elif nch == 'U' and _parse_hex(s, pos + 2, 8) is not None: + pos = _parse_hex(s, pos + 2, 8)[0] + else: + return pos # bare '_': separator + else: + return pos + else: + return pos + return pos + + +def _find_lp_body(s): + """ + Given s = everything after 'lp_' in a symbol, find where the declaration + body (Name.mangleAux output) starts. + Returns the start index of the body within s, or None. + + Strategy: try all candidate split points where the package part is a valid + String.mangle output and the body round-trips. Prefer the longest valid + package name (most specific match). + """ + length = len(s) + + # Collect candidate split positions: every '_' that could be the separator + candidates = [] + pos = 0 + while pos < length: + if s[pos] == '_': + candidates.append(pos) + pos += 1 + + # Try each candidate; collect all valid splits + valid_splits = [] + for split_pos in candidates: + pkg_part = s[:split_pos] + if not pkg_part: + continue + if not _is_valid_string_mangle(pkg_part): + continue + body = s[split_pos + 1:] + if not body: + continue + components = demangle_body(body) + if not components: + continue + remangled = mangle_name(components, prefix="") + if remangled == body: + first = components[0] + # Score: prefer first component starting with uppercase + has_upper = isinstance(first, str) and first and first[0].isupper() + valid_splits.append((split_pos, has_upper)) + + if valid_splits: + # Among splits where first decl component starts uppercase, pick longest pkg. + # Otherwise pick shortest pkg. + upper_splits = [s for s in valid_splits if s[1]] + if upper_splits: + best = max(upper_splits, key=lambda x: x[0]) + else: + best = min(valid_splits, key=lambda x: x[0]) + return best[0] + 1 + + # Fallback: greedy String.mangle scan + greedy_pos = _skip_string_mangle(s, 0) + if greedy_pos < length and s[greedy_pos] == '_': + return greedy_pos + 1 + + return None + + +# --------------------------------------------------------------------------- +# Format name components for display +# --------------------------------------------------------------------------- + +def format_name(components): + """Format a list of name components as a dot-separated string.""" + return '.'.join(str(c) for c in components) + + +# --------------------------------------------------------------------------- +# Human-friendly postprocessing +# --------------------------------------------------------------------------- + +# Compiler-generated suffix components — exact match +_SUFFIX_FLAGS_EXACT = { + '_redArg': 'arity\u2193', + '_boxed': 'boxed', + '_impl': 'impl', +} + +# Compiler-generated suffix prefixes — match with optional _N index +# e.g., _lam, _lam_0, _lam_3, _lambda_0, _closed_2 +_SUFFIX_FLAGS_PREFIX = { + '_lam': '\u03bb', + '_lambda': '\u03bb', + '_elam': '\u03bb', + '_jp': 'jp', + '_closed': 'closed', +} + + +def _match_suffix(component): + """ + Check if a string component is a compiler-generated suffix. + Returns the flag label or None. + + Handles both exact matches (_redArg, _boxed) and indexed suffixes + (_lam_0, _lambda_2, _closed_0) produced by appendIndexAfter. + """ + if not isinstance(component, str): + return None + if component in _SUFFIX_FLAGS_EXACT: + return _SUFFIX_FLAGS_EXACT[component] + if component in _SUFFIX_FLAGS_PREFIX: + return _SUFFIX_FLAGS_PREFIX[component] + # Check for indexed suffix: prefix + _N + for prefix, label in _SUFFIX_FLAGS_PREFIX.items(): + if component.startswith(prefix + '_'): + rest = component[len(prefix) + 1:] + if rest.isdigit(): + return label + return None + + +def _strip_private(components): + """Strip _private.Module.0. prefix. Returns (stripped_parts, is_private).""" + if (len(components) >= 3 and isinstance(components[0], str) and + components[0] == '_private'): + for i in range(1, len(components)): + if components[i] == 0: + if i + 1 < len(components): + return components[i + 1:], True + break + return components, False + + +def _strip_spec_suffixes(components): + """Strip trailing spec_N components (from appendIndexAfter).""" + parts = list(components) + while parts and isinstance(parts[-1], str) and parts[-1].startswith('spec_'): + rest = parts[-1][5:] + if rest.isdigit(): + parts.pop() + else: + break + return parts + + +def _is_spec_index(component): + """Check if a component is a spec_N index (from appendIndexAfter).""" + return (isinstance(component, str) and + component.startswith('spec_') and component[5:].isdigit()) + + +def _parse_spec_entries(rest): + """Parse _at_..._spec pairs into separate spec context entries. + + Given components starting from the first _at_, returns: + - entries: list of component lists, one per _at_..._spec block + - remaining: components after the last _spec N (trailing suffixes) + """ + entries = [] + current_ctx = None + remaining = [] + skip_next = False + + for p in rest: + if skip_next: + skip_next = False + continue + if isinstance(p, str) and p == '_at_': + if current_ctx is not None: + entries.append(current_ctx) + current_ctx = [] + continue + if isinstance(p, str) and p == '_spec': + if current_ctx is not None: + entries.append(current_ctx) + current_ctx = None + skip_next = True + continue + if isinstance(p, str) and p.startswith('_spec'): + if current_ctx is not None: + entries.append(current_ctx) + current_ctx = None + continue + if current_ctx is not None: + current_ctx.append(p) + else: + remaining.append(p) + + if current_ctx is not None: + entries.append(current_ctx) + + return entries, remaining + + +def _process_spec_context(components): + """Process a spec context into a clean name and its flags. + + Returns (name_parts, flags) where name_parts are the cleaned components + and flags is a deduplicated list of flag labels from compiler suffixes. + """ + parts = list(components) + parts, _ = _strip_private(parts) + + name_parts = [] + ctx_flags = [] + seen = set() + + for p in parts: + flag = _match_suffix(p) + if flag is not None: + if flag not in seen: + ctx_flags.append(flag) + seen.add(flag) + elif _is_spec_index(p): + pass + else: + name_parts.append(p) + + return name_parts, ctx_flags + + +def postprocess_name(components): + """ + Transform raw demangled components into a human-friendly display string. + + Applies: + - Private name cleanup: _private.Module.0.Name.foo -> Name.foo [private] + - Hygienic name cleanup: strips _@.module._hygCtx._hyg.N + - Suffix folding: _redArg, _boxed, _lam_0, etc. -> [flags] + - Specialization: f._at_.g._spec.N -> f spec at g + Shown after base [flags], with context flags: spec at g[ctx_flags] + """ + if not components: + return "" + + parts = list(components) + flags = [] + spec_entries = [] + + # --- Strip _private prefix --- + parts, is_private = _strip_private(parts) + + # --- Strip hygienic suffixes: everything from _@ onward --- + at_idx = None + for i, p in enumerate(parts): + if isinstance(p, str) and p.startswith('_@'): + at_idx = i + break + if at_idx is not None: + parts = parts[:at_idx] + + # --- Handle specialization: _at_ ... _spec N --- + at_positions = [i for i, p in enumerate(parts) + if isinstance(p, str) and p == '_at_'] + if at_positions: + first_at = at_positions[0] + base = parts[:first_at] + rest = parts[first_at:] + + entries, remaining = _parse_spec_entries(rest) + for ctx_components in entries: + ctx_name, ctx_flags = _process_spec_context(ctx_components) + if ctx_name or ctx_flags: + spec_entries.append((ctx_name, ctx_flags)) + + parts = base + remaining + + # --- Collect suffix flags from the end --- + while parts: + last = parts[-1] + flag = _match_suffix(last) + if flag is not None: + flags.append(flag) + parts.pop() + elif isinstance(last, int) and len(parts) >= 2: + prev_flag = _match_suffix(parts[-2]) + if prev_flag is not None: + flags.append(prev_flag) + parts.pop() # remove the number + parts.pop() # remove the suffix + else: + break + else: + break + + if is_private: + flags.append('private') + + # --- Format result --- + name = '.'.join(str(c) for c in parts) if parts else '?' + result = name + if flags: + flag_str = ', '.join(flags) + result += f' [{flag_str}]' + + for ctx_name, ctx_flags in spec_entries: + ctx_str = '.'.join(str(c) for c in ctx_name) if ctx_name else '?' + if ctx_flags: + ctx_flag_str = ', '.join(ctx_flags) + result += f' spec at {ctx_str}[{ctx_flag_str}]' + else: + result += f' spec at {ctx_str}' + + return result + + +# --------------------------------------------------------------------------- +# Main demangling entry point +# --------------------------------------------------------------------------- + +def demangle_lean_name_raw(mangled): + """ + Demangle a Lean C symbol, preserving all internal name components. + + Returns the exact demangled name with all compiler-generated suffixes + intact. Use demangle_lean_name() for human-friendly output. + """ + try: + return _demangle_lean_name_inner(mangled, human_friendly=False) + except Exception: + return mangled + + +def demangle_lean_name(mangled): + """ + Demangle a C symbol name produced by the Lean 4 compiler. + + Returns a human-friendly demangled name with compiler suffixes folded + into readable flags. Use demangle_lean_name_raw() to preserve all + internal components. + """ + try: + return _demangle_lean_name_inner(mangled, human_friendly=True) + except Exception: + return mangled + + +def _demangle_lean_name_inner(mangled, human_friendly=True): + """Inner demangle that may raise on malformed input.""" + + if mangled == "_lean_main": + return "[lean] main" + + # Handle lean_ runtime functions + if human_friendly and mangled.startswith("lean_apply_"): + rest = mangled[11:] + if rest.isdigit(): + return f"" + + # Strip .cold.N suffix (LLVM linker cold function clones) + cold_suffix = "" + core = mangled + dot_pos = core.find('.cold.') + if dot_pos >= 0: + cold_suffix = " " + core[dot_pos:] + core = core[:dot_pos] + elif core.endswith('.cold'): + cold_suffix = " .cold" + core = core[:-5] + + result = _demangle_core(core, human_friendly) + if result is None: + return mangled + return result + cold_suffix + + +def _demangle_core(mangled, human_friendly=True): + """Demangle a symbol without .cold suffix. Returns None if not a Lean name.""" + + fmt = postprocess_name if human_friendly else format_name + + # _init_ prefix + if mangled.startswith("_init_"): + rest = mangled[6:] + body, pkg_display = _strip_lean_prefix(rest) + if body is None: + return None + components = demangle_body(body) + if not components: + return None + name = fmt(components) + if pkg_display: + return f"[init] {name} ({pkg_display})" + return f"[init] {name}" + + # initialize_ prefix (module init functions) + if mangled.startswith("initialize_"): + rest = mangled[11:] + # With package: initialize_lp_{pkg}_{body} or initialize_l_{body} + body, pkg_display = _strip_lean_prefix(rest) + if body is not None: + components = demangle_body(body) + if components: + name = fmt(components) + if pkg_display: + return f"[module_init] {name} ({pkg_display})" + return f"[module_init] {name}" + # Without package: initialize_{Name.mangleAux(moduleName)} + if rest: + components = demangle_body(rest) + if components: + return f"[module_init] {fmt(components)}" + return None + + # l_ or lp_ prefix + body, pkg_display = _strip_lean_prefix(mangled) + if body is None: + return None + components = demangle_body(body) + if not components: + return None + name = fmt(components) + if pkg_display: + return f"{name} ({pkg_display})" + return name + + +def _strip_lean_prefix(s): + """ + Strip the l_ or lp_ prefix from a mangled symbol. + Returns (body, pkg_display) where body is the Name.mangleAux output + and pkg_display is None or a string describing the package. + Returns (None, None) if the string doesn't have a recognized prefix. + """ + if s.startswith("l_"): + return (s[2:], None) + + if s.startswith("lp_"): + after_lp = s[3:] + body_start = _find_lp_body(after_lp) + if body_start is not None: + pkg_mangled = after_lp[:body_start - 1] + # Unmangle the package name + pkg_components = demangle_body(pkg_mangled) + if pkg_components and len(pkg_components) == 1 and isinstance(pkg_components[0], str): + pkg_display = pkg_components[0] + else: + pkg_display = pkg_mangled + return (after_lp[body_start:], pkg_display) + # Fallback: treat everything after lp_ as body + return (after_lp, "?") + + return (None, None) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + """Filter stdin or arguments, demangling Lean names.""" + import argparse + parser = argparse.ArgumentParser( + description="Demangle Lean 4 C symbol names (like c++filt for Lean)") + parser.add_argument('names', nargs='*', + help='Names to demangle (reads stdin if none given)') + parser.add_argument('--raw', action='store_true', + help='Output exact demangled names without postprocessing') + args = parser.parse_args() + + demangle = demangle_lean_name_raw if args.raw else demangle_lean_name + + if args.names: + for name in args.names: + print(demangle(name)) + else: + for line in sys.stdin: + print(demangle(line.rstrip('\n'))) + + +if __name__ == '__main__': + main() diff --git a/script/profiler/lean_demangle_profile.py b/script/profiler/lean_demangle_profile.py new file mode 100644 index 0000000000..250dff4982 --- /dev/null +++ b/script/profiler/lean_demangle_profile.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +""" +Lean name demangler for samply / Firefox Profiler profiles. + +Reads a profile JSON (plain or gzipped), demangles Lean function names +in the string table, and writes the result back. + +Usage: + python lean_demangle_profile.py profile.json -o profile-demangled.json + python lean_demangle_profile.py profile.json.gz -o profile-demangled.json.gz +""" + +import argparse +import gzip +import json +import sys + +from lean_demangle import demangle_lean_name + + +def _demangle_string_array(string_array): + """Demangle Lean names in a string array in-place. Returns count.""" + count = 0 + for i, s in enumerate(string_array): + if not isinstance(s, str): + continue + demangled = demangle_lean_name(s) + if demangled != s: + string_array[i] = demangled + count += 1 + return count + + +def rewrite_profile(profile): + """ + Demangle Lean names in a Firefox Profiler profile dict (in-place). + + Handles two profile formats: + - Newer: shared.stringArray (single shared string table) + - Older/samply: per-thread stringArray (each thread has its own) + """ + count = 0 + + # Shared string table (newer Firefox Profiler format) + shared = profile.get("shared") + if shared is not None: + sa = shared.get("stringArray") + if sa is not None: + count += _demangle_string_array(sa) + + # Per-thread string tables (samply format) + for thread in profile.get("threads", []): + sa = thread.get("stringArray") + if sa is not None: + count += _demangle_string_array(sa) + + return count + + +def process_profile_file(input_path, output_path): + """Read a profile, demangle names, write it back.""" + is_gzip = input_path.endswith('.gz') + + if is_gzip: + with gzip.open(input_path, 'rt', encoding='utf-8') as f: + profile = json.load(f) + else: + with open(input_path, 'r', encoding='utf-8') as f: + profile = json.load(f) + + count = rewrite_profile(profile) + + out_gzip = output_path.endswith('.gz') if output_path else is_gzip + + if output_path: + if out_gzip: + with gzip.open(output_path, 'wt', encoding='utf-8') as f: + json.dump(profile, f, ensure_ascii=False) + else: + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(profile, f, ensure_ascii=False) + else: + json.dump(profile, sys.stdout, ensure_ascii=False) + sys.stdout.write('\n') + + return count + + +def main(): + parser = argparse.ArgumentParser( + description="Demangle Lean names in samply/Firefox Profiler profiles") + parser.add_argument('input', help='Input profile (JSON or .json.gz)') + parser.add_argument('-o', '--output', + help='Output path (default: stdout for JSON, ' + 'or input with -demangled suffix)') + args = parser.parse_args() + + output = args.output + if output is None and not sys.stdout.isatty(): + output = None # write to stdout + elif output is None: + # Generate output filename + inp = args.input + if inp.endswith('.json.gz'): + output = inp[:-8] + '-demangled.json.gz' + elif inp.endswith('.json'): + output = inp[:-5] + '-demangled.json' + else: + output = inp + '-demangled' + + count = process_profile_file(args.input, output) + if output: + print(f"Demangled {count} names, wrote {output}", file=sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/script/profiler/serve_profile.py b/script/profiler/serve_profile.py new file mode 100644 index 0000000000..c7962e04bb --- /dev/null +++ b/script/profiler/serve_profile.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" +Serve a Firefox Profiler JSON file and open it in the browser. + +Unlike `samply load`, this does NOT provide a symbolication API, +so Firefox Profiler will use the names already in the profile as-is. +""" + +import argparse +import gzip +import http.server +import io +import sys +import threading +import webbrowser +import urllib.parse + + +class ProfileHandler(http.server.BaseHTTPRequestHandler): + """Serve the profile JSON and handle CORS for Firefox Profiler.""" + + profile_data = None # set by main() + + def do_GET(self): + if self.path == "/profile.json": + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Encoding", "gzip") + self.send_header("Access-Control-Allow-Origin", "*") + self.end_headers() + self.wfile.write(self.profile_data) + else: + self.send_response(404) + self.end_headers() + + def do_OPTIONS(self): + # CORS preflight + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + self.end_headers() + + def log_message(self, format, *args): + pass # suppress request logs + + +def main(): + parser = argparse.ArgumentParser( + description="Serve a profile JSON for Firefox Profiler") + parser.add_argument("profile", help="Profile file (.json or .json.gz)") + parser.add_argument("-P", "--port", type=int, default=3457, + help="Port to serve on (default: 3457)") + parser.add_argument("-n", "--no-open", action="store_true", + help="Do not open the browser") + args = parser.parse_args() + + # Read the profile data (keep it gzipped for efficient serving) + if args.profile.endswith(".gz"): + with open(args.profile, "rb") as f: + ProfileHandler.profile_data = f.read() + else: + with open(args.profile, "rb") as f: + raw = f.read() + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode="wb") as gz: + gz.write(raw) + ProfileHandler.profile_data = buf.getvalue() + + http.server.HTTPServer.allow_reuse_address = True + server = http.server.HTTPServer(("127.0.0.1", args.port), ProfileHandler) + profile_url = f"http://127.0.0.1:{args.port}/profile.json" + encoded = urllib.parse.quote(profile_url, safe="") + viewer_url = f"https://profiler.firefox.com/from-url/{encoded}" + + if not args.no_open: + # Open browser after a short delay to let server start + def open_browser(): + webbrowser.open(viewer_url) + threading.Timer(0.5, open_browser).start() + + print(f"Serving profile at {profile_url}", file=sys.stderr) + print(f"Firefox Profiler: {viewer_url}", file=sys.stderr) + print("Press Ctrl+C to stop.", file=sys.stderr) + + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopped.", file=sys.stderr) + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/script/profiler/symbolicate_profile.py b/script/profiler/symbolicate_profile.py new file mode 100644 index 0000000000..5cb8bedd6e --- /dev/null +++ b/script/profiler/symbolicate_profile.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +Symbolicate a raw samply profile using samply's symbolication API, +then demangle Lean names. + +Usage: + python symbolicate_profile.py --server http://127.0.0.1:3000/TOKEN \ + raw-profile.json.gz -o symbolicated-demangled.json.gz +""" + +import argparse +import gzip +import json +import sys +import urllib.request + +from lean_demangle import demangle_lean_name + + +def symbolicate_and_demangle(profile, server_url): + """ + Symbolicate a raw samply profile via the symbolication API, + then demangle Lean names. Modifies the profile in-place. + Returns the number of names resolved. + """ + libs = profile.get("libs", []) + memory_map = [[lib["debugName"], lib["breakpadId"]] for lib in libs] + + count = 0 + for thread in profile.get("threads", []): + count += _process_thread(thread, libs, memory_map, server_url) + + return count + + +def _process_thread(thread, libs, memory_map, server_url): + """Symbolicate and demangle one thread. Returns count of resolved names.""" + sa = thread.get("stringArray") + ft = thread.get("frameTable") + func_t = thread.get("funcTable") + rt = thread.get("resourceTable") + + if not all([sa, ft, func_t, rt]): + return 0 + + # Build mapping: func_index -> (lib_index, address) + # A function may be referenced by multiple frames; pick any address. + func_info = {} # func_idx -> (lib_idx, address) + for i in range(ft.get("length", 0)): + addr = ft["address"][i] + func_idx = ft["func"][i] + if func_idx in func_info: + continue + res_idx = func_t["resource"][func_idx] + if res_idx < 0 or res_idx >= rt.get("length", 0): + continue + lib_idx = rt["lib"][res_idx] + if lib_idx < 0 or lib_idx >= len(libs): + continue + func_info[func_idx] = (lib_idx, addr) + + if not func_info: + return 0 + + # Batch symbolication: group by lib, send all addresses at once + frames_to_symbolicate = [] + func_order = [] # track which func each frame corresponds to + for func_idx, (lib_idx, addr) in func_info.items(): + frames_to_symbolicate.append([lib_idx, addr]) + func_order.append(func_idx) + + # Call the symbolication API + symbols = _call_symbolication_api( + server_url, memory_map, frames_to_symbolicate) + + if not symbols: + return 0 + + # Update stringArray with demangled names + count = 0 + for func_idx, symbol_name in zip(func_order, symbols): + if symbol_name is None: + continue + demangled = demangle_lean_name(symbol_name) + name_idx = func_t["name"][func_idx] + if name_idx < len(sa): + sa[name_idx] = demangled + count += 1 + + return count + + +def _call_symbolication_api(server_url, memory_map, frames): + """ + Call the Firefox Profiler symbolication API v5. + frames: list of [lib_index, address] + Returns: list of symbol names (or None for unresolved frames). + """ + url = server_url.rstrip("/") + "/symbolicate/v5" + + # Send all frames as one "stack" in one job + req_body = json.dumps({ + "memoryMap": memory_map, + "stacks": [frames], + }).encode() + + req = urllib.request.Request( + url, + data=req_body, + headers={"Content-Type": "application/json"}, + ) + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + result = json.loads(resp.read()) + except Exception as e: + print(f"Symbolication API error: {e}", file=sys.stderr) + return None + + if "error" in result: + print(f"Symbolication API error: {result['error']}", file=sys.stderr) + return None + + # Extract symbol names from result + results = result.get("results", []) + if not results: + return None + + stacks = results[0].get("stacks", [[]]) + if not stacks: + return None + + symbols = [] + for frame_result in stacks[0]: + if isinstance(frame_result, dict): + symbols.append(frame_result.get("function")) + elif isinstance(frame_result, str): + symbols.append(frame_result) + else: + symbols.append(None) + + return symbols + + +def process_file(input_path, output_path, server_url): + """Read a raw profile, symbolicate + demangle, write it back.""" + is_gzip = input_path.endswith('.gz') + + if is_gzip: + with gzip.open(input_path, 'rt', encoding='utf-8') as f: + profile = json.load(f) + else: + with open(input_path, 'r', encoding='utf-8') as f: + profile = json.load(f) + + count = symbolicate_and_demangle(profile, server_url) + + out_gzip = output_path.endswith('.gz') if output_path else is_gzip + if output_path: + if out_gzip: + with gzip.open(output_path, 'wt', encoding='utf-8') as f: + json.dump(profile, f, ensure_ascii=False) + else: + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(profile, f, ensure_ascii=False) + else: + json.dump(profile, sys.stdout, ensure_ascii=False) + sys.stdout.write('\n') + + return count + + +def main(): + parser = argparse.ArgumentParser( + description="Symbolicate a raw samply profile and demangle Lean names") + parser.add_argument('input', help='Raw profile (JSON or .json.gz)') + parser.add_argument('-o', '--output', help='Output path') + parser.add_argument('--server', required=True, + help='Samply server URL (e.g., http://127.0.0.1:3000/TOKEN)') + args = parser.parse_args() + + output = args.output + if output is None: + inp = args.input + if inp.endswith('.json.gz'): + output = inp[:-8] + '-demangled.json.gz' + elif inp.endswith('.json'): + output = inp[:-5] + '-demangled.json' + else: + output = inp + '-demangled' + + count = process_file(args.input, output, args.server) + print(f"Symbolicated and demangled {count} names, wrote {output}", + file=sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/script/profiler/test_demangle.py b/script/profiler/test_demangle.py new file mode 100644 index 0000000000..effffaf084 --- /dev/null +++ b/script/profiler/test_demangle.py @@ -0,0 +1,670 @@ +#!/usr/bin/env python3 +"""Tests for the Lean name demangler.""" + +import unittest +import json +import gzip +import tempfile +import os + +from lean_demangle import ( + mangle_string, mangle_name, demangle_body, format_name, + demangle_lean_name, demangle_lean_name_raw, postprocess_name, + _parse_hex, _check_disambiguation, +) + + +class TestStringMangle(unittest.TestCase): + """Test String.mangle (character-level escaping).""" + + def test_alphanumeric(self): + self.assertEqual(mangle_string("hello"), "hello") + self.assertEqual(mangle_string("abc123"), "abc123") + + def test_underscore(self): + self.assertEqual(mangle_string("a_b"), "a__b") + self.assertEqual(mangle_string("_"), "__") + self.assertEqual(mangle_string("__"), "____") + + def test_special_chars(self): + self.assertEqual(mangle_string("."), "_x2e") + self.assertEqual(mangle_string("a.b"), "a_x2eb") + + def test_unicode(self): + self.assertEqual(mangle_string("\u03bb"), "_u03bb") + self.assertEqual(mangle_string("\U0001d55c"), "_U0001d55c") + + def test_empty(self): + self.assertEqual(mangle_string(""), "") + + +class TestNameMangle(unittest.TestCase): + """Test Name.mangle (hierarchical name mangling).""" + + def test_simple(self): + self.assertEqual(mangle_name(["Lean", "Meta", "Sym", "main"]), + "l_Lean_Meta_Sym_main") + + def test_single_component(self): + self.assertEqual(mangle_name(["main"]), "l_main") + + def test_numeric_component(self): + self.assertEqual( + mangle_name(["_private", "Lean", "Meta", "Basic", 0, + "Lean", "Meta", "withMVarContextImp"]), + "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") + + def test_component_with_underscore(self): + self.assertEqual(mangle_name(["a_b"]), "l_a__b") + self.assertEqual(mangle_name(["a_b", "c"]), "l_a__b_c") + + def test_disambiguation_digit_start(self): + self.assertEqual(mangle_name(["0foo"]), "l_000foo") + + def test_disambiguation_escape_start(self): + self.assertEqual(mangle_name(["a", "x27"]), "l_a_00x27") + + def test_numeric_root(self): + self.assertEqual(mangle_name([42]), "l_42_") + self.assertEqual(mangle_name([42, "foo"]), "l_42__foo") + + def test_component_ending_with_underscore(self): + self.assertEqual(mangle_name(["a_", "b"]), "l_a___00b") + + def test_custom_prefix(self): + self.assertEqual(mangle_name(["foo"], prefix="lp_pkg_"), + "lp_pkg_foo") + + +class TestDemangleBody(unittest.TestCase): + """Test demangle_body (the core Name.demangleAux algorithm).""" + + def test_simple(self): + self.assertEqual(demangle_body("Lean_Meta_Sym_main"), + ["Lean", "Meta", "Sym", "main"]) + + def test_single(self): + self.assertEqual(demangle_body("main"), ["main"]) + + def test_empty(self): + self.assertEqual(demangle_body(""), []) + + def test_underscore_in_component(self): + self.assertEqual(demangle_body("a__b"), ["a_b"]) + self.assertEqual(demangle_body("a__b_c"), ["a_b", "c"]) + + def test_numeric_component(self): + self.assertEqual(demangle_body("foo_42__bar"), ["foo", 42, "bar"]) + + def test_numeric_root(self): + self.assertEqual(demangle_body("42_"), [42]) + + def test_numeric_at_end(self): + self.assertEqual(demangle_body("foo_42_"), ["foo", 42]) + + def test_disambiguation_00(self): + self.assertEqual(demangle_body("a_00x27"), ["a", "x27"]) + + def test_disambiguation_00_at_root(self): + self.assertEqual(demangle_body("000foo"), ["0foo"]) + + def test_hex_escape_x(self): + self.assertEqual(demangle_body("a_x2eb"), ["a.b"]) + + def test_hex_escape_u(self): + self.assertEqual(demangle_body("_u03bb"), ["\u03bb"]) + + def test_hex_escape_U(self): + self.assertEqual(demangle_body("_U0001d55c"), ["\U0001d55c"]) + + def test_private_name(self): + body = "__private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp" + self.assertEqual(demangle_body(body), + ["_private", "Lean", "Meta", "Basic", 0, + "Lean", "Meta", "withMVarContextImp"]) + + def test_boxed_suffix(self): + body = "foo___boxed" + self.assertEqual(demangle_body(body), ["foo", "_boxed"]) + + def test_redArg_suffix(self): + body = "foo_bar___redArg" + self.assertEqual(demangle_body(body), ["foo", "bar", "_redArg"]) + + def test_component_ending_underscore_disambiguation(self): + self.assertEqual(demangle_body("a___00b"), ["a_", "b"]) + + +class TestRoundTrip(unittest.TestCase): + """Test that mangle(demangle(x)) == x for various names.""" + + def _check_roundtrip(self, components): + mangled = mangle_name(components, prefix="") + demangled = demangle_body(mangled) + self.assertEqual(demangled, components, + f"Round-trip failed: {components} -> '{mangled}' -> {demangled}") + mangled_with_prefix = mangle_name(components, prefix="l_") + self.assertTrue(mangled_with_prefix.startswith("l_")) + body = mangled_with_prefix[2:] + demangled2 = demangle_body(body) + self.assertEqual(demangled2, components) + + def test_simple_names(self): + self._check_roundtrip(["Lean", "Meta", "main"]) + self._check_roundtrip(["a"]) + self._check_roundtrip(["Foo", "Bar", "baz"]) + + def test_numeric(self): + self._check_roundtrip(["foo", 0, "bar"]) + self._check_roundtrip([42]) + self._check_roundtrip(["a", 1, "b", 2, "c"]) + + def test_underscores(self): + self._check_roundtrip(["_private"]) + self._check_roundtrip(["a_b", "c_d"]) + self._check_roundtrip(["_at_", "_spec"]) + + def test_private_name(self): + self._check_roundtrip(["_private", "Lean", "Meta", "Basic", 0, + "Lean", "Meta", "withMVarContextImp"]) + + def test_boxed(self): + self._check_roundtrip(["Lean", "Meta", "foo", "_boxed"]) + + def test_redArg(self): + self._check_roundtrip(["Lean", "Meta", "foo", "_redArg"]) + + def test_specialization(self): + self._check_roundtrip(["List", "map", "_at_", "Foo", "bar", "_spec", 3]) + + def test_lambda(self): + self._check_roundtrip(["Foo", "bar", "_lambda", 0]) + self._check_roundtrip(["Foo", "bar", "_lambda", 2]) + + def test_closed(self): + self._check_roundtrip(["myConst", "_closed", 0]) + + def test_special_chars(self): + self._check_roundtrip(["a.b"]) + self._check_roundtrip(["\u03bb"]) + self._check_roundtrip(["a", "b\u2192c"]) + + def test_disambiguation_cases(self): + self._check_roundtrip(["a", "x27"]) + self._check_roundtrip(["0foo"]) + self._check_roundtrip(["a_", "b"]) + + def test_complex_real_names(self): + """Names modeled after real Lean compiler output.""" + self._check_roundtrip( + ["Lean", "MVarId", "withContext", "_at_", + "_private", "Lean", "Meta", "Sym", 0, + "Lean", "Meta", "Sym", "BackwardRule", "apply", + "_spec", 2, "_redArg", "_lambda", 0, "_boxed"]) + + +class TestDemangleRaw(unittest.TestCase): + """Test demangle_lean_name_raw (exact demangling, no postprocessing).""" + + def test_l_prefix(self): + self.assertEqual( + demangle_lean_name_raw("l_Lean_Meta_Sym_main"), + "Lean.Meta.Sym.main") + + def test_l_prefix_private(self): + result = demangle_lean_name_raw( + "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp") + self.assertEqual(result, + "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp") + + def test_l_prefix_boxed(self): + result = demangle_lean_name_raw("l_foo___boxed") + self.assertEqual(result, "foo._boxed") + + def test_l_prefix_redArg(self): + result = demangle_lean_name_raw( + "l___private_Lean_Meta_Basic_0__Lean_Meta_withMVarContextImp___redArg") + self.assertEqual( + result, + "_private.Lean.Meta.Basic.0.Lean.Meta.withMVarContextImp._redArg") + + def test_lean_main(self): + self.assertEqual(demangle_lean_name_raw("_lean_main"), "[lean] main") + + def test_non_lean_names(self): + self.assertEqual(demangle_lean_name_raw("printf"), "printf") + self.assertEqual(demangle_lean_name_raw("malloc"), "malloc") + self.assertEqual(demangle_lean_name_raw("lean_apply_5"), "lean_apply_5") + self.assertEqual(demangle_lean_name_raw(""), "") + + def test_init_prefix(self): + result = demangle_lean_name_raw("_init_l_Lean_Meta_foo") + self.assertEqual(result, "[init] Lean.Meta.foo") + + def test_lp_prefix_simple(self): + mangled = mangle_name(["Lean", "Meta", "foo"], prefix="lp_std_") + self.assertEqual(mangled, "lp_std_Lean_Meta_foo") + result = demangle_lean_name_raw(mangled) + self.assertEqual(result, "Lean.Meta.foo (std)") + + def test_lp_prefix_underscore_pkg(self): + pkg_mangled = mangle_string("my_pkg") + self.assertEqual(pkg_mangled, "my__pkg") + mangled = mangle_name(["Lean", "Meta", "foo"], + prefix=f"lp_{pkg_mangled}_") + self.assertEqual(mangled, "lp_my__pkg_Lean_Meta_foo") + result = demangle_lean_name_raw(mangled) + self.assertEqual(result, "Lean.Meta.foo (my_pkg)") + + def test_lp_prefix_private_decl(self): + mangled = mangle_name( + ["_private", "X", 0, "Y", "foo"], prefix="lp_pkg_") + self.assertEqual(mangled, "lp_pkg___private_X_0__Y_foo") + result = demangle_lean_name_raw(mangled) + self.assertEqual(result, "_private.X.0.Y.foo (pkg)") + + def test_complex_specialization(self): + components = [ + "Lean", "MVarId", "withContext", "_at_", + "_private", "Lean", "Meta", "Sym", 0, + "Lean", "Meta", "Sym", "BackwardRule", "apply", + "_spec", 2, "_redArg", "_lambda", 0, "_boxed" + ] + mangled = mangle_name(components) + result = demangle_lean_name_raw(mangled) + expected = format_name(components) + self.assertEqual(result, expected) + + def test_cold_suffix(self): + result = demangle_lean_name_raw("l_Lean_Meta_foo___redArg.cold.1") + self.assertEqual(result, "Lean.Meta.foo._redArg .cold.1") + + def test_cold_suffix_plain(self): + result = demangle_lean_name_raw("l_Lean_Meta_foo.cold") + self.assertEqual(result, "Lean.Meta.foo .cold") + + def test_initialize_no_pkg(self): + result = demangle_lean_name_raw("initialize_Init_Control_Basic") + self.assertEqual(result, "[module_init] Init.Control.Basic") + + def test_initialize_with_l_prefix(self): + result = demangle_lean_name_raw("initialize_l_Lean_Meta_foo") + self.assertEqual(result, "[module_init] Lean.Meta.foo") + + def test_never_crashes(self): + """Demangling should never raise, just return the original.""" + weird_inputs = [ + "", "l_", "lp_", "lp_x", "_init_", "initialize_", + "l_____", "lp____", "l_00", "l_0", + "some random string", "l_ space", + ] + for inp in weird_inputs: + result = demangle_lean_name_raw(inp) + self.assertIsInstance(result, str) + + +class TestPostprocess(unittest.TestCase): + """Test postprocess_name (human-friendly suffix folding, etc.).""" + + def test_no_change(self): + self.assertEqual(postprocess_name(["Lean", "Meta", "main"]), + "Lean.Meta.main") + + def test_boxed(self): + self.assertEqual(postprocess_name(["foo", "_boxed"]), + "foo [boxed]") + + def test_redArg(self): + self.assertEqual(postprocess_name(["foo", "bar", "_redArg"]), + "foo.bar [arity\u2193]") + + def test_lambda_separate(self): + # _lam as separate component + numeric index + self.assertEqual(postprocess_name(["foo", "_lam", 0]), + "foo [\u03bb]") + + def test_lambda_indexed(self): + # _lam_0 as single string (appendIndexAfter) + self.assertEqual(postprocess_name(["foo", "_lam_0"]), + "foo [\u03bb]") + self.assertEqual(postprocess_name(["foo", "_lambda_2"]), + "foo [\u03bb]") + + def test_lambda_boxed(self): + # _lam_0 followed by _boxed + self.assertEqual( + postprocess_name(["Lean", "Meta", "Simp", "simpLambda", + "_lam_0", "_boxed"]), + "Lean.Meta.Simp.simpLambda [boxed, \u03bb]") + + def test_closed(self): + self.assertEqual(postprocess_name(["myConst", "_closed", 3]), + "myConst [closed]") + + def test_closed_indexed(self): + self.assertEqual(postprocess_name(["myConst", "_closed_0"]), + "myConst [closed]") + + def test_multiple_suffixes(self): + self.assertEqual(postprocess_name(["foo", "_redArg", "_boxed"]), + "foo [boxed, arity\u2193]") + + def test_redArg_lam(self): + # _redArg followed by _lam_0 (issue #4) + self.assertEqual( + postprocess_name(["Lean", "profileitIOUnsafe", + "_redArg", "_lam_0"]), + "Lean.profileitIOUnsafe [\u03bb, arity\u2193]") + + def test_private_name(self): + self.assertEqual( + postprocess_name(["_private", "Lean", "Meta", "Basic", 0, + "Lean", "Meta", "withMVarContextImp"]), + "Lean.Meta.withMVarContextImp [private]") + + def test_private_with_suffix(self): + self.assertEqual( + postprocess_name(["_private", "Lean", "Meta", "Basic", 0, + "Lean", "Meta", "foo", "_redArg"]), + "Lean.Meta.foo [arity\u2193, private]") + + def test_hygienic_strip(self): + self.assertEqual( + postprocess_name(["Lean", "Meta", "foo", "_@", "Lean", "Meta", + "_hyg", 42]), + "Lean.Meta.foo") + + def test_specialization(self): + self.assertEqual( + postprocess_name(["List", "map", "_at_", "Foo", "bar", + "_spec", 3]), + "List.map spec at Foo.bar") + + def test_specialization_with_suffix(self): + # Base suffix _boxed appears in [flags] before spec at + self.assertEqual( + postprocess_name(["Lean", "MVarId", "withContext", "_at_", + "Foo", "bar", "_spec", 2, "_boxed"]), + "Lean.MVarId.withContext [boxed] spec at Foo.bar") + + def test_spec_context_with_flags(self): + # Compiler suffixes in spec context become context flags + self.assertEqual( + postprocess_name(["Lean", "Meta", "foo", "_at_", + "Lean", "Meta", "bar", "_elam_1", "_redArg", + "_spec", 2]), + "Lean.Meta.foo spec at Lean.Meta.bar[\u03bb, arity\u2193]") + + def test_spec_context_flags_dedup(self): + # Duplicate flag labels are deduplicated + self.assertEqual( + postprocess_name(["f", "_at_", + "g", "_lam_0", "_elam_1", "_redArg", + "_spec", 1]), + "f spec at g[\u03bb, arity\u2193]") + + def test_multiple_at(self): + # Multiple _at_ entries become separate spec at clauses + self.assertEqual( + postprocess_name(["f", "_at_", "g", "_spec", 1, + "_at_", "h", "_spec", 2]), + "f spec at g spec at h") + + def test_multiple_at_with_flags(self): + # Multiple spec at with flags on base and contexts + self.assertEqual( + postprocess_name(["f", "_at_", "g", "_redArg", "_spec", 1, + "_at_", "h", "_lam_0", "_spec", 2, + "_boxed"]), + "f [boxed] spec at g[arity\u2193] spec at h[\u03bb]") + + def test_base_flags_before_spec(self): + # Base trailing suffixes appear in [flags] before spec at + self.assertEqual( + postprocess_name(["f", "_at_", "g", "_spec", 1, "_lam_0"]), + "f [\u03bb] spec at g") + + def test_spec_context_strip_spec_suffixes(self): + # spec_0 in context should be stripped + self.assertEqual( + postprocess_name(["Lean", "Meta", "transformWithCache", "visit", + "_at_", + "_private", "Lean", "Meta", "Transform", 0, + "Lean", "Meta", "transform", + "Lean", "Meta", "Sym", "unfoldReducible", + "spec_0", "spec_0", + "_spec", 1]), + "Lean.Meta.transformWithCache.visit " + "spec at Lean.Meta.transform.Lean.Meta.Sym.unfoldReducible") + + def test_spec_context_strip_private(self): + # _private in spec context should be stripped + self.assertEqual( + postprocess_name(["Array", "mapMUnsafe", "map", "_at_", + "_private", "Lean", "Meta", "Transform", 0, + "Lean", "Meta", "transformWithCache", "visit", + "_spec", 1]), + "Array.mapMUnsafe.map " + "spec at Lean.Meta.transformWithCache.visit") + + def test_empty(self): + self.assertEqual(postprocess_name([]), "") + + +class TestDemangleHumanFriendly(unittest.TestCase): + """Test demangle_lean_name (human-friendly output).""" + + def test_simple(self): + self.assertEqual(demangle_lean_name("l_Lean_Meta_main"), + "Lean.Meta.main") + + def test_boxed(self): + self.assertEqual(demangle_lean_name("l_foo___boxed"), + "foo [boxed]") + + def test_redArg(self): + self.assertEqual(demangle_lean_name("l_foo___redArg"), + "foo [arity\u2193]") + + def test_private(self): + self.assertEqual( + demangle_lean_name( + "l___private_Lean_Meta_Basic_0__Lean_Meta_foo"), + "Lean.Meta.foo [private]") + + def test_private_with_redArg(self): + self.assertEqual( + demangle_lean_name( + "l___private_Lean_Meta_Basic_0__Lean_Meta_foo___redArg"), + "Lean.Meta.foo [arity\u2193, private]") + + def test_cold_with_suffix(self): + self.assertEqual( + demangle_lean_name("l_Lean_Meta_foo___redArg.cold.1"), + "Lean.Meta.foo [arity\u2193] .cold.1") + + def test_lean_apply(self): + self.assertEqual(demangle_lean_name("lean_apply_5"), "") + self.assertEqual(demangle_lean_name("lean_apply_12"), "") + + def test_lean_apply_raw_unchanged(self): + self.assertEqual(demangle_lean_name_raw("lean_apply_5"), + "lean_apply_5") + + def test_init_private(self): + self.assertEqual( + demangle_lean_name( + "_init_l___private_X_0__Y_foo"), + "[init] Y.foo [private]") + + def test_complex_specialization(self): + components = [ + "Lean", "MVarId", "withContext", "_at_", + "_private", "Lean", "Meta", "Sym", 0, + "Lean", "Meta", "Sym", "BackwardRule", "apply", + "_spec", 2, "_redArg", "_lambda", 0, "_boxed" + ] + mangled = mangle_name(components) + result = demangle_lean_name(mangled) + # Base: Lean.MVarId.withContext with trailing _redArg, _lambda 0, _boxed + # Spec context: Lean.Meta.Sym.BackwardRule.apply (private stripped) + self.assertEqual( + result, + "Lean.MVarId.withContext [boxed, \u03bb, arity\u2193] " + "spec at Lean.Meta.Sym.BackwardRule.apply") + + def test_non_lean_unchanged(self): + self.assertEqual(demangle_lean_name("printf"), "printf") + self.assertEqual(demangle_lean_name("malloc"), "malloc") + self.assertEqual(demangle_lean_name(""), "") + + +class TestDemangleProfile(unittest.TestCase): + """Test the profile rewriter.""" + + def _make_profile_shared(self, strings): + """Create a profile with shared.stringArray (newer format).""" + return { + "meta": {"version": 28}, + "libs": [], + "shared": { + "stringArray": list(strings), + }, + "threads": [{ + "name": "main", + "pid": "1", + "tid": 1, + "funcTable": { + "name": list(range(len(strings))), + "isJS": [False] * len(strings), + "relevantForJS": [False] * len(strings), + "resource": [-1] * len(strings), + "fileName": [None] * len(strings), + "lineNumber": [None] * len(strings), + "columnNumber": [None] * len(strings), + "length": len(strings), + }, + "frameTable": {"length": 0}, + "stackTable": {"length": 0}, + "samples": {"length": 0}, + "markers": {"length": 0}, + "resourceTable": {"length": 0}, + "nativeSymbols": {"length": 0}, + }], + "pages": [], + "counters": [], + } + + def _make_profile_per_thread(self, strings): + """Create a profile with per-thread stringArray (samply format).""" + return { + "meta": {"version": 28}, + "libs": [], + "threads": [{ + "name": "main", + "pid": "1", + "tid": 1, + "stringArray": list(strings), + "funcTable": { + "name": list(range(len(strings))), + "isJS": [False] * len(strings), + "relevantForJS": [False] * len(strings), + "resource": [-1] * len(strings), + "fileName": [None] * len(strings), + "lineNumber": [None] * len(strings), + "columnNumber": [None] * len(strings), + "length": len(strings), + }, + "frameTable": {"length": 0}, + "stackTable": {"length": 0}, + "samples": {"length": 0}, + "markers": {"length": 0}, + "resourceTable": {"length": 0}, + "nativeSymbols": {"length": 0}, + }], + "pages": [], + "counters": [], + } + + def test_profile_rewrite_shared(self): + from lean_demangle_profile import rewrite_profile + strings = [ + "l_Lean_Meta_Sym_main", + "printf", + "lean_apply_5", + "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", + ] + profile = self._make_profile_shared(strings) + rewrite_profile(profile) + sa = profile["shared"]["stringArray"] + self.assertEqual(sa[0], "Lean.Meta.Sym.main") + self.assertEqual(sa[1], "printf") + self.assertEqual(sa[2], "") + self.assertEqual(sa[3], "Lean.Meta.foo [private]") + + def test_profile_rewrite_per_thread(self): + from lean_demangle_profile import rewrite_profile + strings = [ + "l_Lean_Meta_Sym_main", + "printf", + "lean_apply_5", + "l___private_Lean_Meta_Basic_0__Lean_Meta_foo", + ] + profile = self._make_profile_per_thread(strings) + count = rewrite_profile(profile) + sa = profile["threads"][0]["stringArray"] + self.assertEqual(sa[0], "Lean.Meta.Sym.main") + self.assertEqual(sa[1], "printf") + self.assertEqual(sa[2], "") + self.assertEqual(sa[3], "Lean.Meta.foo [private]") + self.assertEqual(count, 3) + + def test_profile_json_roundtrip(self): + from lean_demangle_profile import process_profile_file + strings = ["l_Lean_Meta_main", "malloc"] + profile = self._make_profile_shared(strings) + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', + delete=False) as f: + json.dump(profile, f) + inpath = f.name + + outpath = inpath.replace('.json', '-demangled.json') + try: + process_profile_file(inpath, outpath) + with open(outpath) as f: + result = json.load(f) + self.assertEqual(result["shared"]["stringArray"][0], + "Lean.Meta.main") + self.assertEqual(result["shared"]["stringArray"][1], "malloc") + finally: + os.unlink(inpath) + if os.path.exists(outpath): + os.unlink(outpath) + + def test_profile_gzip_roundtrip(self): + from lean_demangle_profile import process_profile_file + strings = ["l_Lean_Meta_main", "malloc"] + profile = self._make_profile_shared(strings) + + with tempfile.NamedTemporaryFile(suffix='.json.gz', + delete=False) as f: + with gzip.open(f, 'wt') as gz: + json.dump(profile, gz) + inpath = f.name + + outpath = inpath.replace('.json.gz', '-demangled.json.gz') + try: + process_profile_file(inpath, outpath) + with gzip.open(outpath, 'rt') as f: + result = json.load(f) + self.assertEqual(result["shared"]["stringArray"][0], + "Lean.Meta.main") + finally: + os.unlink(inpath) + if os.path.exists(outpath): + os.unlink(outpath) + + +if __name__ == '__main__': + unittest.main()