lean4/script/lean-bisect

#!/usr/bin/env python3
"""
lean-bisect: Bisect Lean toolchain versions to find where behavior changes.

Usage:
    lean-bisect path/to/file.lean                                      # auto-find regression
    lean-bisect path/to/file.lean ..nightly-2024-06-01                 # bisect to a nightly
    lean-bisect path/to/file.lean nightly-2024-01-01..nightly-2024-06-01
    lean-bisect path/to/file.lean nightly-2024-01-01..nightly-2024-06-01 --timeout 30
    lean-bisect path/to/file.lean abc1234..def5678                     # bisect commits
    lean-bisect --selftest
    lean-bisect --clear-cache

For SHA-based bisection, the script will first try to download pre-built CI artifacts
from GitHub Actions (fast: ~30s) before falling back to building from source (slow: 2-5min).
CI artifacts are cached in ~/.cache/lean_build_artifact/ for reuse.
"""

import argparse
import json
import os
import platform
import re
import shutil
import subprocess
import sys
import tempfile
import urllib.request
import urllib.error
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, Tuple, List

# Import shared artifact download functionality
sys.path.insert(0, str(Path(__file__).parent))
import build_artifact

# Constants
NIGHTLY_PATTERN = re.compile(r'^nightly-(\d{4})-(\d{2})-(\d{2})(-rev(\d+))?$')
VERSION_PATTERN = re.compile(r'^v4\.(\d+)\.(\d+)(-rc\d+)?$')
# Accept short SHAs (7+ chars) - we'll resolve to full SHA later
SHA_PATTERN = re.compile(r'^[0-9a-f]{7,40}$')

GITHUB_API_BASE = "https://api.github.com"
NIGHTLY_REPO = "leanprover/lean4-nightly"
LEAN4_REPO = "leanprover/lean4"

# Re-export from build_artifact for local use
ARTIFACT_CACHE = build_artifact.ARTIFACT_CACHE
CI_FAILED = build_artifact.CI_FAILED

# ANSI colors for terminal output
class Colors:
    RED = '\033[91m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    BLUE = '\033[94m'
    BOLD = '\033[1m'
    RESET = '\033[0m'

def color(text: str, c: str) -> str:
    """Apply color to text if stdout is a tty."""
    if sys.stdout.isatty():
        return f"{c}{text}{Colors.RESET}"
    return text

@dataclass
class BuildResult:
    exit_code: int
    stdout: str
    stderr: str
    timed_out: bool

    def signature(self, ignore_messages: bool) -> Tuple:
        """Return a comparable signature for this result."""
        if ignore_messages:
            # Treat timeout as a distinct exit code
            return (self.exit_code if not self.timed_out else -124,)
        return (self.exit_code if not self.timed_out else -124, self.stdout, self.stderr)

def error(msg: str) -> None:
    """Print error message and exit."""
    print(color(f"Error: {msg}", Colors.RED), file=sys.stderr)
    sys.exit(1)

def warn(msg: str) -> None:
    """Print warning message."""
    print(color(f"Warning: {msg}", Colors.YELLOW), file=sys.stderr)

def info(msg: str) -> None:
    """Print info message to stdout."""
    print(color(msg, Colors.BLUE))

def success(msg: str) -> None:
    """Print success message to stdout."""
    print(color(msg, Colors.GREEN))

# -----------------------------------------------------------------------------
# Import sanity check
# -----------------------------------------------------------------------------

def check_imports(file_path: Path) -> None:
    """Check that the file only imports from Lean.* or Std.*"""
    allowed_prefixes = ('Lean', 'Std')

    with open(file_path, 'r') as f:
        content = f.read()

    import_pattern = re.compile(r'^\s*import\s+(\S+)', re.MULTILINE)

    for match in import_pattern.finditer(content):
        module = match.group(1)
        # Get the first component of the module path
        first_component = module.split('.')[0]

        if first_component not in allowed_prefixes:
            error(f"File imports '{module}' which is outside Lean.*/Std.*\n"
                  f"lean-bisect only supports files that import from Lean or Std.\n"
                  f"This is because we test against bare toolchains without lake dependencies.")

# -----------------------------------------------------------------------------
# Identifier type detection
# -----------------------------------------------------------------------------

def resolve_sha(short_sha: str) -> str:
    """Resolve a (possibly short) SHA to full 40-character SHA using git rev-parse."""
    if len(short_sha) == 40:
        return short_sha
    try:
        result = subprocess.run(
            ['git', 'rev-parse', short_sha],
            capture_output=True,
            text=True,
            timeout=5
        )
        if result.returncode == 0:
            full_sha = result.stdout.strip()
            if len(full_sha) == 40:
                return full_sha
        error(f"Cannot resolve SHA '{short_sha}': {result.stderr.strip() or 'not found in repository'}")
    except subprocess.TimeoutExpired:
        error(f"Timeout resolving SHA '{short_sha}'")
    except FileNotFoundError:
        error("git not found - required for SHA resolution")

def parse_identifier(s: str) -> Tuple[str, str]:
    """
    Parse an identifier and return (type, value).
    Types: 'nightly', 'version', 'sha'
    For SHAs, resolves short SHAs to full 40-character SHAs.
    """
    if NIGHTLY_PATTERN.match(s):
        return ('nightly', s)
    if VERSION_PATTERN.match(s):
        return ('version', s)
    if SHA_PATTERN.match(s):
        full_sha = resolve_sha(s)
        return ('sha', full_sha)
    error(f"Invalid identifier format: '{s}'\n"
          f"Expected one of:\n"
          f"  - nightly-YYYY-MM-DD or nightly-YYYY-MM-DD-revK (e.g., nightly-2024-06-15, nightly-2024-06-15-rev1)\n"
          f"  - v4.X.Y or v4.X.Y-rcK (e.g., v4.8.0, v4.9.0-rc1)\n"
          f"  - commit SHA (short or full)")


def parse_range(range_str: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
    """
    Parse a range string and return (from_id, to_id).

    Syntax:
      FROM...TO or FROM..TO  → (FROM, TO)
      FROM                   → (FROM, None)
      ...TO or ..TO          → (None, TO)
      None                   → (None, None)
    """
    if range_str is None:
        return (None, None)

    # Check for range separator (... or ..)
    if '...' in range_str:
        parts = range_str.split('...', 1)
        from_id = parts[0] if parts[0] else None
        to_id = parts[1] if parts[1] else None
        return (from_id, to_id)
    elif '..' in range_str:
        parts = range_str.split('..', 1)
        from_id = parts[0] if parts[0] else None
        to_id = parts[1] if parts[1] else None
        return (from_id, to_id)
    else:
        # Single identifier = FROM
        return (range_str, None)


# -----------------------------------------------------------------------------
# GitHub API helpers
# -----------------------------------------------------------------------------

def github_api_request(url: str) -> dict:
    """Make a GitHub API request and return JSON response."""
    headers = {
        'Accept': 'application/vnd.github.v3+json',
        'User-Agent': 'lean-bisect'
    }

    token = build_artifact.get_github_token()
    if token:
        headers['Authorization'] = f'token {token}'

    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=30) as response:
            return json.loads(response.read().decode())
    except urllib.error.HTTPError as e:
        if e.code == 403:
            error(f"GitHub API rate limit exceeded. Set GITHUB_TOKEN environment variable to increase limit.")
        elif e.code == 404:
            error(f"GitHub resource not found: {url}")
        else:
            error(f"GitHub API error: {e.code} {e.reason}")
    except urllib.error.URLError as e:
        error(f"Network error accessing GitHub API: {e.reason}")

def fetch_nightly_tags() -> List[str]:
    """Fetch all nightly tags from GitHub, sorted by date (oldest first)."""
    tags = []
    page = 1

    while True:
        url = f"{GITHUB_API_BASE}/repos/{NIGHTLY_REPO}/tags?per_page=100&page={page}"
        data = github_api_request(url)

        if not data:
            break

        for tag in data:
            name = tag['name']
            if NIGHTLY_PATTERN.match(name):
                tags.append(name)

        page += 1

        # GitHub returns empty list when no more pages
        if len(data) < 100:
            break

    # Sort by date and revision (nightly-YYYY-MM-DD-revK needs numeric comparison on rev)
    def nightly_sort_key(tag):
        m = NIGHTLY_PATTERN.match(tag)
        if m:
            return (int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(5) or 0))
        return (0, 0, 0, 0)
    tags.sort(key=nightly_sort_key)
    return tags

def get_commit_for_nightly(nightly: str) -> str:
    """Get the commit SHA for a nightly tag."""
    url = f"{GITHUB_API_BASE}/repos/{NIGHTLY_REPO}/git/refs/tags/{nightly}"
    data = github_api_request(url)

    # The ref might point to a tag object or directly to a commit
    sha = data['object']['sha']
    obj_type = data['object']['type']

    if obj_type == 'tag':
        # Need to dereference the tag object
        tag_url = f"{GITHUB_API_BASE}/repos/{NIGHTLY_REPO}/git/tags/{sha}"
        tag_data = github_api_request(tag_url)
        sha = tag_data['object']['sha']

    return sha

def get_commit_for_version(version: str) -> str:
    """Get the commit SHA for a version tag in lean4 repo."""
    url = f"{GITHUB_API_BASE}/repos/{LEAN4_REPO}/git/refs/tags/{version}"
    data = github_api_request(url)

    sha = data['object']['sha']
    obj_type = data['object']['type']

    if obj_type == 'tag':
        tag_url = f"{GITHUB_API_BASE}/repos/{LEAN4_REPO}/git/tags/{sha}"
        tag_data = github_api_request(tag_url)
        sha = tag_data['object']['sha']

    return sha

# -----------------------------------------------------------------------------
# Build functions
# -----------------------------------------------------------------------------

def ensure_toolchain_installed(toolchain: str, work_dir: Path) -> None:
    """Ensure the toolchain is installed (let elan handle it)."""
    # Write lean-toolchain file
    (work_dir / 'lean-toolchain').write_text(f'leanprover/lean4:{toolchain}\n')

    # Run a simple command to trigger toolchain download
    # We use 'lake --version' as it's quick and triggers elan
    # Don't capture output so user can see download progress
    try:
        subprocess.run(
            ['lake', '--version'],
            cwd=work_dir,
            stdout=subprocess.DEVNULL,  # Hide "Lake version ..." output
            # stderr passes through to show elan download progress
            timeout=600  # 10 minutes for toolchain download
        )
    except subprocess.TimeoutExpired:
        error(f"Timeout waiting for toolchain {toolchain} to download")
    except FileNotFoundError:
        error("lake not found. Is elan installed and in PATH?")

def build_with_toolchain(
    toolchain: str,
    file_path: Path,
    work_dir: Path,
    timeout: Optional[int] = None
) -> BuildResult:
    """
    Build file with given toolchain.
    Returns BuildResult with exit_code, stdout, stderr, and timed_out flag.
    """
    # Ensure toolchain is installed first (not subject to timeout)
    ensure_toolchain_installed(toolchain, work_dir)

    # Copy file to work directory
    target_file = work_dir / file_path.name
    shutil.copy(file_path, target_file)

    # Run lean on the file
    try:
        result = subprocess.run(
            ['lake', 'env', 'lean', target_file.name],
            cwd=work_dir,
            capture_output=True,
            timeout=timeout,
            text=True
        )
        return BuildResult(
            exit_code=result.returncode,
            stdout=result.stdout,
            stderr=result.stderr,
            timed_out=False
        )
    except subprocess.TimeoutExpired:
        return BuildResult(
            exit_code=-124,
            stdout='',
            stderr=f'Process timed out after {timeout} seconds',
            timed_out=True
        )
    except FileNotFoundError:
        error("lake not found. Is elan installed and in PATH?")

# -----------------------------------------------------------------------------
# Bisection
# -----------------------------------------------------------------------------

def format_result(result: BuildResult) -> str:
    """Format a build result for display."""
    if result.timed_out:
        return color("TIMEOUT", Colors.YELLOW)
    elif result.exit_code == 0:
        return color("OK", Colors.GREEN)
    else:
        return color(f"FAIL (exit {result.exit_code})", Colors.RED)

def print_verbose_result(result: BuildResult) -> None:
    """Print detailed output from a build result."""
    if result.stdout.strip():
        print(color("    stdout:", Colors.BOLD))
        for line in result.stdout.strip().split('\n'):
            print(f"      {line}")
    if result.stderr.strip():
        print(color("    stderr:", Colors.BOLD))
        for line in result.stderr.strip().split('\n'):
            print(f"      {line}")

def bisect_nightlies(
    file_path: Path,
    nightlies: List[str],
    work_dir: Path,
    timeout: Optional[int],
    ignore_messages: bool,
    verbose: bool = False,
    file_display: str = None
) -> Tuple[str, str]:
    """
    Bisect through nightlies to find where behavior changes.
    Returns the pair of adjacent nightlies where behavior changes.
    """
    results = {}  # Cache of results

    def test_nightly(nightly: str) -> BuildResult:
        if nightly not in results:
            print(f"  Testing {nightly}... ", end='', flush=True)
            result = build_with_toolchain(nightly, file_path, work_dir, timeout)
            results[nightly] = result
            print(format_result(result))
            if verbose:
                print_verbose_result(result)
        return results[nightly]

    lo, hi = 0, len(nightlies) - 1

    # Test endpoints
    lo_result = test_nightly(nightlies[lo])
    hi_result = test_nightly(nightlies[hi])

    lo_sig = lo_result.signature(ignore_messages)
    hi_sig = hi_result.signature(ignore_messages)

    if lo_sig == hi_sig:
        return None, None  # No change detected

    # Binary search
    while hi - lo > 1:
        mid = (lo + hi) // 2
        mid_result = test_nightly(nightlies[mid])
        mid_sig = mid_result.signature(ignore_messages)

        # Update bounds first
        if mid_sig == lo_sig:
            lo = mid
        else:
            hi = mid

        # Then print the updated range
        print(f"\n  Current range: {nightlies[lo]} .. {nightlies[hi]} ({hi - lo} nightlies)")
        print(f"  Resume command: {sys.argv[0]} {file_display or file_path} {nightlies[lo]}...{nightlies[hi]}", end='')
        if timeout:
            print(f" --timeout {timeout}", end='')
        if ignore_messages:
            print(" --ignore-messages", end='')
        print("\n")

    return nightlies[lo], nightlies[hi]

def find_regression_range(
    file_path: Path,
    to_nightly: str,
    all_nightlies: List[str],
    work_dir: Path,
    timeout: Optional[int],
    ignore_messages: bool
) -> Tuple[Optional[str], Optional[str]]:
    """
    Find a nightly range that brackets a behavior change using exponential search.
    Starts from to_nightly and goes back in time, doubling the step each iteration.
    Returns (from_nightly, to_nightly) bracketing the change, or (None, None) if not found.
    """
    results = {}  # Cache of results

    def test_nightly(nightly: str) -> BuildResult:
        if nightly not in results:
            print(f"  Testing {nightly}... ", end='', flush=True)
            result = build_with_toolchain(nightly, file_path, work_dir, timeout)
            results[nightly] = result
            print(format_result(result))
        return results[nightly]

    # Find position of to_nightly in the list
    if to_nightly not in all_nightlies:
        error(f"Nightly {to_nightly} not found")
    to_idx = all_nightlies.index(to_nightly)

    # Test the target nightly first
    info(f"Testing target nightly {to_nightly}...")
    to_result = test_nightly(to_nightly)
    to_sig = to_result.signature(ignore_messages)

    # Exponential search backwards
    step = 1
    prev_idx = to_idx

    while True:
        # Calculate target index, going back by 'step' nightlies
        target_idx = to_idx - step

        if target_idx < 0:
            # We've gone past the beginning of available nightlies
            target_idx = 0
            if target_idx == prev_idx:
                warn("Reached earliest available nightly without finding behavior change")
                return None, None

        from_nightly = all_nightlies[target_idx]
        print()
        info(f"Testing {from_nightly} (step={step})...")
        from_result = test_nightly(from_nightly)
        from_sig = from_result.signature(ignore_messages)

        if from_sig != to_sig:
            # Found a difference! The regression is between target_idx and prev_idx
            success(f"Found behavior change between {from_nightly} and {all_nightlies[prev_idx]}")
            return from_nightly, all_nightlies[prev_idx]

        if target_idx == 0:
            warn("Reached earliest available nightly without finding behavior change")
            return None, None

        prev_idx = target_idx
        step *= 2

# -----------------------------------------------------------------------------
# Version tag handling
# -----------------------------------------------------------------------------

def find_closest_nightlies_for_version(version: str, all_nightlies: List[str]) -> Tuple[Optional[str], Optional[str]]:
    """
    Find the closest nightlies before and after a version's branch point from master.

    For v4.X.Y, finds where releases/v4.X.0 diverged from master, since nightlies
    are built from master. This means v4.25.0, v4.25.1, v4.25.2 all map to the
    same nightly range.

    Returns (nightly_before, nightly_after).
    """
    # Parse version to get the release branch name (v4.X.Y -> releases/v4.X.0)
    # VERSION_PATTERN is r'^v4\.(\d+)\.(\d+)(-rc\d+)?$'
    # So group(1) is the minor version (X), group(2) is patch (Y)
    match = VERSION_PATTERN.match(version)
    if not match:
        error(f"Invalid version format: {version}")

    minor_version = match.group(1)  # e.g., "25" from v4.25.2
    release_branch = f"releases/v4.{minor_version}.0"

    info(f"Version {version} is on branch {release_branch}")

    # Find merge-base between release branch and master
    # This is where the release branch diverged from master
    try:
        url = f"{GITHUB_API_BASE}/repos/{LEAN4_REPO}/compare/master...{release_branch}"
        compare_data = github_api_request(url)
        merge_base_sha = compare_data['merge_base_commit']['sha']
        merge_base_date = compare_data['merge_base_commit']['commit']['committer']['date'][:10]

        info(f"Branch {release_branch} diverged from master at {merge_base_sha[:12]} ({merge_base_date})")
    except Exception as e:
        # Fallback: try to get the commit date of the version tag directly
        warn(f"Could not find merge base for {release_branch}, falling back to tag date")
        version_sha = get_commit_for_version(version)
        url = f"{GITHUB_API_BASE}/repos/{LEAN4_REPO}/commits/{version_sha}"
        commit_data = github_api_request(url)
        merge_base_date = commit_data['commit']['committer']['date'][:10]

    # Find nightlies around the merge base date
    nightly_before = None
    nightly_after = None

    for nightly in all_nightlies:
        nightly_match = NIGHTLY_PATTERN.match(nightly)
        if nightly_match:
            nightly_date = f"{nightly_match.group(1)}-{nightly_match.group(2)}-{nightly_match.group(3)}"
            if nightly_date <= merge_base_date:
                nightly_before = nightly
            elif nightly_date > merge_base_date and nightly_after is None:
                nightly_after = nightly

    return nightly_before, nightly_after

def handle_version_endpoints(
    file_path: Path,
    from_id: str,
    to_id: str,
    from_type: str,
    to_type: str,
    timeout: Optional[int],
    ignore_messages: bool
) -> Tuple[str, str, List[str]]:
    """
    Handle the case where one or both endpoints are version tags.
    Converts version tags to equivalent nightlies.
    Returns (from_nightly, to_nightly, all_nightlies).
    """
    all_nightlies = fetch_nightly_tags()

    suggested_from = from_id
    suggested_to = to_id

    if from_type == 'version':
        before, after = find_closest_nightlies_for_version(from_id, all_nightlies)
        if before:
            info(f"For FROM={from_id}, using nightly {before} (latest nightly before the version)")
            suggested_from = before
        else:
            error(f"Could not find a nightly before version {from_id}")

    if to_type == 'version':
        before, after = find_closest_nightlies_for_version(to_id, all_nightlies)
        if after:
            info(f"For TO={to_id}, using nightly {after} (earliest nightly after the version)")
            suggested_to = after
        else:
            error(f"Could not find a nightly after version {to_id}")

    return suggested_from, suggested_to, all_nightlies

# -----------------------------------------------------------------------------
# Commit SHA bisection
# -----------------------------------------------------------------------------

def find_lean4_repo() -> Optional[Path]:
    """Try to find a local lean4 repository to use as reference."""
    cwd = Path.cwd()

    # If we're in a script/ directory, step up one level
    if cwd.name == 'script':
        cwd = cwd.parent

    # Check if this directory is a lean4 repo
    try:
        result = subprocess.run(
            ['git', 'remote', 'get-url', 'origin'],
            cwd=cwd,
            capture_output=True,
            text=True
        )
        if result.returncode == 0 and 'lean4' in result.stdout.lower():
            return cwd
    except:
        pass

    return None

def bisect_commits(
    file_path: Path,
    from_sha: str,
    to_sha: str,
    work_dir: Path,
    timeout: Optional[int],
    ignore_messages: bool,
    file_display: str = None
) -> None:
    """Bisect through commits on master."""
    build_dir = Path(tempfile.mkdtemp(prefix='lean-bisect-build-'))

    # Try to find a local lean4 repo to use as reference (speeds up clone significantly)
    local_repo = find_lean4_repo()

    try:
        if local_repo:
            info(f"Found local lean4 repo at {local_repo}, using as reference...")
            info(f"Cloning to {build_dir}...")
            subprocess.run(
                ['git', 'clone', '--reference', str(local_repo),
                 f'https://github.com/{LEAN4_REPO}.git', str(build_dir)],
                check=True
            )
        else:
            info(f"No local lean4 repo found, cloning from scratch to {build_dir}...")
            info("(Tip: run from within a lean4 checkout to speed this up)")
            subprocess.run(
                ['git', 'clone', f'https://github.com/{LEAN4_REPO}.git', str(build_dir)],
                check=True
            )

        # Fetch latest from origin
        subprocess.run(
            ['git', 'fetch', 'origin'],
            cwd=build_dir,
            check=True
        )

        # Verify commits are on master
        for sha in [from_sha, to_sha]:
            result = subprocess.run(
                ['git', 'merge-base', '--is-ancestor', sha, 'origin/master'],
                cwd=build_dir,
                capture_output=True
            )
            if result.returncode != 0:
                error(f"Commit {sha} is not an ancestor of master")

        # Get list of commits between from and to (need full SHAs for CI artifact lookup)
        result = subprocess.run(
            ['git', 'log', '--format=%H', '--reverse', f'{from_sha}..{to_sha}'],
            cwd=build_dir,
            capture_output=True,
            text=True,
            check=True
        )

        commits = [from_sha]  # Include the from commit
        for line in result.stdout.strip().split('\n'):
            if line:
                commits.append(line.strip())

        info(f"Found {len(commits)} commits to bisect")
        print()

        results = {}  # sha -> BuildResult
        failed_builds = set()  # commits that failed to build
        no_artifact = set()  # commits with no CI artifact available

        # Check if we can use CI artifacts
        use_artifacts = (build_artifact.check_gh_available() and
                         build_artifact.check_zstd_support() and
                         build_artifact.get_artifact_name() is not None)
        if use_artifacts:
            info("CI artifact download available (will try before building from source)")
        else:
            if not build_artifact.check_gh_available():
                warn("gh CLI not available or not authenticated; will build from source")
            elif not build_artifact.check_zstd_support():
                warn("tar does not support zstd; will build from source")
            elif build_artifact.get_artifact_name() is None:
                warn("No CI artifacts available for this platform; will build from source")
        print()

        def get_toolchain_for_commit(sha: str) -> Optional[Path]:
            """
            Get a toolchain path for testing a commit.
            Tries CI artifact first, falls back to source build.
            Returns toolchain path or None if unavailable.
            """
            # Try CI artifact first (unless we know there isn't one)
            if use_artifacts and sha not in no_artifact:
                print(f"  Checking {sha[:12]}... ", end='', flush=True)

                # Check if already cached
                if build_artifact.is_cached(sha):
                    print(color("cached", Colors.GREEN))
                    return build_artifact.get_cache_path(sha)

                artifact_result = build_artifact.download_ci_artifact(sha)
                if artifact_result is CI_FAILED:
                    print(color("CI failed, skipping", Colors.YELLOW))
                    return None  # Don't bother building locally
                elif artifact_result:
                    print(color("using CI artifact", Colors.GREEN))
                    return artifact_result
                else:
                    no_artifact.add(sha)
                    print("no artifact, ", end='', flush=True)
            else:
                print(f"  Checking out {sha[:12]}... ", end='', flush=True)

            # Fall back to building from source
            return build_lean_at_commit_internal(sha)

        def build_lean_at_commit_internal(sha: str) -> Optional[Path]:
            """Build lean at a specific commit. Returns stage1 path if successful, None otherwise."""
            # Only print "Checking out" if we didn't already (when artifact path printed it)
            if sha not in no_artifact or not use_artifacts:
                pass  # Already printed in get_toolchain_for_commit
            subprocess.run(
                ['git', 'checkout', '-q', sha],
                cwd=build_dir,
                check=True
            )

            # Configure cmake if build/release doesn't exist
            build_release = build_dir / 'build' / 'release'
            if not build_release.exists():
                print("configuring... ", end='', flush=True)
                build_release.mkdir(parents=True, exist_ok=True)
                result = subprocess.run(
                    ['cmake', '../..', '-DCMAKE_BUILD_TYPE=Release'],
                    cwd=build_release,
                    capture_output=True,
                    text=True
                )
                if result.returncode != 0:
                    print(color("CMAKE FAILED", Colors.YELLOW))
                    failed_builds.add(sha)
                    return None

            print("building... ", end='', flush=True)

            # Build lean
            result = subprocess.run(
                ['make', '-j', '-C', 'build/release'],
                cwd=build_dir,
                capture_output=True,
                text=True
            )

            if result.returncode != 0:
                print(color("BUILD FAILED", Colors.YELLOW))
                failed_builds.add(sha)
                return None

            print(color("built", Colors.GREEN))
            return build_dir / 'build' / 'release' / 'stage1'

        def test_commit(sha: str) -> Optional[BuildResult]:
            """Test the file against a commit. Returns None if toolchain unavailable."""
            if sha in results:
                return results[sha]

            if sha in failed_builds:
                return None

            toolchain_path = get_toolchain_for_commit(sha)
            if toolchain_path is None:
                return None

            # Link as elan toolchain
            toolchain_name = f'lean-bisect-{sha[:12]}'

            subprocess.run(
                ['elan', 'toolchain', 'link', toolchain_name, str(toolchain_path)],
                check=True,
                capture_output=True
            )

            try:
                # Write lean-toolchain and test
                (work_dir / 'lean-toolchain').write_text(f'{toolchain_name}\n')
                shutil.copy(file_path, work_dir / file_path.name)

                print(f"    Testing {sha[:12]}... ", end='', flush=True)

                try:
                    result = subprocess.run(
                        ['lake', 'env', 'lean', file_path.name],
                        cwd=work_dir,
                        capture_output=True,
                        timeout=timeout,
                        text=True
                    )
                    build_result = BuildResult(
                        exit_code=result.returncode,
                        stdout=result.stdout,
                        stderr=result.stderr,
                        timed_out=False
                    )
                except subprocess.TimeoutExpired:
                    build_result = BuildResult(
                        exit_code=-124,
                        stdout='',
                        stderr=f'Process timed out after {timeout} seconds',
                        timed_out=True
                    )

                results[sha] = build_result
                print(format_result(build_result))
                return build_result

            finally:
                # Unlink toolchain
                subprocess.run(
                    ['elan', 'toolchain', 'uninstall', toolchain_name],
                    capture_output=True
                )

        # Test endpoints
        lo, hi = 0, len(commits) - 1

        lo_result = test_commit(commits[lo])
        if lo_result is None:
            error(f"Could not build at starting commit {commits[lo][:12]}")

        hi_result = test_commit(commits[hi])
        if hi_result is None:
            error(f"Could not build at ending commit {commits[hi][:12]}")

        lo_sig = lo_result.signature(ignore_messages)
        hi_sig = hi_result.signature(ignore_messages)

        if lo_sig == hi_sig:
            info("No behavior change detected between the endpoints.")
            return

        # Binary search
        while hi - lo > 1:
            mid = (lo + hi) // 2
            mid_result = test_commit(commits[mid])

            if mid_result is None:
                # Build failed, try to find another commit nearby
                # Search outward from mid for a buildable commit
                found = False
                for offset in range(1, hi - lo):
                    for candidate in [mid - offset, mid + offset]:
                        if lo < candidate < hi and candidate not in failed_builds:
                            mid_result = test_commit(commits[candidate])
                            if mid_result is not None:
                                mid = candidate
                                found = True
                                break
                    if found:
                        break

                if not found:
                    warn("No buildable commits found in range, cannot narrow further")
                    break

            mid_sig = mid_result.signature(ignore_messages)

            if mid_sig == lo_sig:
                lo = mid
            else:
                hi = mid

            print(f"\n  Current range: {commits[lo][:12]} .. {commits[hi][:12]} ({hi - lo} commits)")
            print(f"  Resume command: {sys.argv[0]} {file_display or file_path} {commits[lo]}...{commits[hi]}", end='')
            if timeout:
                print(f" --timeout {timeout}", end='')
            if ignore_messages:
                print(" --ignore-messages", end='')
            print("\n")

        # Report result
        print()
        if hi - lo == 1:
            success(f"Behavior change introduced in commit {commits[hi][:12]}")
            print()
            # Show commit info
            result = subprocess.run(
                ['git', 'log', '-1', '--oneline', commits[hi]],
                cwd=build_dir,
                capture_output=True,
                text=True
            )
            if result.returncode == 0:
                print(f"  {result.stdout.strip()}")
            print()
            print(f"  Full SHA: {commits[hi]}")
            print(f"  View on GitHub: https://github.com/{LEAN4_REPO}/commit/{commits[hi]}")
        else:
            warn(f"Narrowed to range {commits[lo][:12]} .. {commits[hi][:12]} ({hi - lo} commits)")
            if failed_builds:
                warn(f"Note: {len(failed_builds)} commits failed to build and were skipped")

    finally:
        # Clean up
        shutil.rmtree(build_dir, ignore_errors=True)

# -----------------------------------------------------------------------------
# Selftest
# -----------------------------------------------------------------------------

def run_selftest() -> None:
    """Run the built-in selftest by shelling out to lean-bisect."""
    script_path = Path(__file__).resolve()
    script_dir = script_path.parent
    test_file_abs = script_dir / 'lean-bisect-test.lean'

    if not test_file_abs.exists():
        error(f"Selftest file not found: {test_file_abs}")

    # Use relative paths for nicer output
    try:
        test_file = os.path.relpath(test_file_abs)
        script_rel = os.path.relpath(script_path)
    except ValueError:
        # On Windows, relpath can fail across drives
        test_file = str(test_file_abs)
        script_rel = str(script_path)

    print(color("Running lean-bisect selftest...", Colors.BOLD))
    print(f"Test file: {test_file}")
    print(f"Running: {script_rel} {test_file} nightly-2025-11-01...nightly-2025-11-15 --timeout 10 --ignore-messages -v")
    print()

    # Run lean-bisect as a subprocess, streaming output
    cmd = [
        sys.executable, str(script_path),
        str(test_file),
        'nightly-2025-11-01...nightly-2025-11-15',
        '--timeout', '10',
        '--ignore-messages',
        '-v'
    ]

    # Capture last N lines for validation
    output_lines = []
    max_lines = 50

    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    )

    # Stream output to user and capture it
    for line in process.stdout:
        print(line, end='')
        output_lines.append(line.rstrip())
        if len(output_lines) > max_lines:
            output_lines.pop(0)

    process.wait()

    print()
    print(color("=" * 60, Colors.BOLD))
    print(color("Selftest validation:", Colors.BOLD))
    print()

    # Check for expected output
    output_text = '\n'.join(output_lines)

    expected_change = "Behavior change detected between nightly-2025-11-06 and nightly-2025-11-07"

    if expected_change in output_text:
        success("Selftest PASSED!")
        success(f"Found expected: {expected_change}")
        sys.exit(0)
    else:
        print(color("Selftest FAILED!", Colors.RED))
        print(f"Expected to find: {expected_change}")
        print(f"Last {len(output_lines)} lines of output:")
        for line in output_lines[-20:]:
            print(f"  {line}")
        sys.exit(1)

# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description='Bisect Lean toolchain versions to find where behavior changes.',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Range Syntax:

  FROM..TO                Bisect between FROM and TO
  FROM                    Start from FROM, bisect to latest nightly
  ..TO                    Bisect to TO, search backwards for regression start

  If no range given, searches backwards from latest nightly to find regression.

Identifier Formats:

  nightly-YYYY-MM-DD    Nightly build date (e.g., nightly-2024-06-15)
  nightly-YYYY-MM-DD-revK  Revised nightly (e.g., nightly-2024-06-15-rev1)
                        Uses pre-built toolchains from leanprover/lean4-nightly.
                        Fast: downloads via elan (~30s each).

  v4.X.Y or v4.X.Y-rcN  Version tag (e.g., v4.8.0, v4.9.0-rc1)
                        Converts to equivalent nightly range.

  Commit SHA            Git commit hash (short or full, e.g., abc123def)
                        Bisects individual commits between two points.
                        Tries CI artifacts first (~30s), falls back to building (~2-5min).
                        Commits with failed CI builds are automatically skipped.
                        Artifacts cached in ~/.cache/lean_build_artifact/

Bisection Modes:

  Nightly mode:   Both endpoints are nightly dates.
                  Binary search through nightlies to find the day behavior changed.
                  Then automatically continues to bisect individual commits.
                  Use --nightly-only to stop after finding the nightly range.

  Version mode:   Either endpoint is a version tag.
                  Converts to equivalent nightly range and bisects.

  Commit mode:    Both endpoints are commit SHAs.
                  Binary search through individual commits on master.
                  Output: "Behavior change introduced in commit abc123"

Examples:

  # Simplest: just provide the file, finds the regression automatically
  lean-bisect test.lean

  # Specify an endpoint if you know roughly when it broke
  lean-bisect test.lean ..nightly-2024-06-01

  # Full manual control over the range
  lean-bisect test.lean nightly-2024-01-01..nightly-2024-06-01

  # Only find the nightly range, don't continue to commit bisection
  lean-bisect test.lean nightly-2024-01-01..nightly-2024-06-01 --nightly-only

  # Add a timeout (kills slow/hanging tests)
  lean-bisect test.lean --timeout 30

  # Bisect commits directly (if you already know the commit range)
  lean-bisect test.lean abc1234..def5678

  # Only compare exit codes, ignore output differences
  lean-bisect test.lean --ignore-messages

  # Clear downloaded CI artifacts to free disk space
  lean-bisect --clear-cache
"""
    )

    parser.add_argument('file', nargs='?', help='Lean file to test (must only import Lean.* or Std.*)')
    parser.add_argument('range', nargs='?', metavar='RANGE',
                        help='Range to bisect: FROM..TO, FROM, or ..TO')
    parser.add_argument('--timeout', type=int, metavar='SEC',
                        help='Timeout in seconds for each test run')
    parser.add_argument('--ignore-messages', action='store_true',
                        help='Compare only exit codes, ignore stdout/stderr differences')
    parser.add_argument('--verbose', '-v', action='store_true',
                        help='Show stdout/stderr from each test')
    parser.add_argument('--selftest', action='store_true',
                        help='Run built-in selftest to verify lean-bisect works')
    parser.add_argument('--clear-cache', action='store_true',
                        help='Clear CI artifact cache (~600MB per commit) and exit')
    parser.add_argument('--nightly-only', action='store_true',
                        help='Stop after finding nightly range (don\'t bisect individual commits)')

    args = parser.parse_args()

    # Show help if no arguments provided
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    # Handle cache clearing
    if args.clear_cache:
        if ARTIFACT_CACHE.exists():
            size = sum(f.stat().st_size for f in ARTIFACT_CACHE.rglob('*') if f.is_file())
            shutil.rmtree(ARTIFACT_CACHE)
            info(f"Cleared cache at {ARTIFACT_CACHE} ({size / 1024 / 1024:.1f} MB)")
        else:
            info(f"Cache directory does not exist: {ARTIFACT_CACHE}")
        return

    # Handle selftest
    if args.selftest:
        run_selftest()
        return

    # Validate arguments
    if not args.file:
        parser.error("file is required (unless using --selftest)")

    file_arg = args.file  # Preserve original for resume commands
    file_path = Path(args.file).resolve()
    if not file_path.exists():
        error(f"File not found: {file_path}")

    # Check imports
    check_imports(file_path)

    # Parse range syntax
    all_nightlies = None  # Lazy load
    from_id, to_id = parse_range(args.range)

    if to_id:
        to_type, to_val = parse_identifier(to_id)
    else:
        # Default to most recent nightly
        info("No endpoint specified, fetching latest nightly...")
        all_nightlies = fetch_nightly_tags()
        to_val = all_nightlies[-1]
        to_type = 'nightly'
        info(f"Using latest nightly: {to_val}")

    if from_id:
        from_type, from_val = parse_identifier(from_id)
    else:
        # Will use exponential search to find regression range
        from_type, from_val = None, None

    # Validate --nightly-only
    if args.nightly_only:
        if from_val is not None and from_type != 'nightly':
            error("--nightly-only requires FROM to be a nightly identifier (nightly-YYYY-MM-DD or nightly-YYYY-MM-DD-revK)")
        if to_type != 'nightly':
            error("--nightly-only requires TO to be a nightly identifier (nightly-YYYY-MM-DD or nightly-YYYY-MM-DD-revK)")

    if from_val:
        info(f"From: {from_val} ({from_type})")
    else:
        info("From: (will search backwards to find regression)")
    info(f"To: {to_val} ({to_type})")
    print()

    # Handle different combinations
    if from_type == 'version' or to_type == 'version':
        # Version tag handling - convert to nightlies and continue
        from_val, to_val, all_nightlies = handle_version_endpoints(
            file_path, from_val, to_val,
            from_type, to_type,
            args.timeout, args.ignore_messages
        )
        from_type, to_type = 'nightly', 'nightly'
        print()
        # Fall through to nightly bisection below

    elif from_type == 'sha' and to_type == 'sha':
        # Commit SHA bisection
        work_dir = Path(tempfile.mkdtemp(prefix='lean-bisect-'))
        try:
            bisect_commits(
                file_path, from_val, to_val,
                work_dir, args.timeout, args.ignore_messages,
                file_display=file_arg
            )
        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
        return

    # Nightly bisection (with optional exponential search for FROM)
    if from_type is not None and from_type != 'nightly':
        error("Mixed identifier types not supported. Both FROM and TO must be nightlies, versions, or SHAs.")
    if to_type != 'nightly':
        error("Mixed identifier types not supported. Both FROM and TO must be nightlies, versions, or SHAs.")

    # Fetch all nightly tags (if not already fetched)
    if all_nightlies is None:
        info("Fetching nightly tags from GitHub...")
        all_nightlies = fetch_nightly_tags()

    # Validate to_val exists
    if to_val not in all_nightlies:
        error(f"Nightly {to_val} not found. Check https://github.com/{NIGHTLY_REPO}/tags")

    # Create temp directory
    work_dir = Path(tempfile.mkdtemp(prefix='lean-bisect-'))

    try:
        # If FROM not specified, use exponential search to find regression range
        if from_val is None:
            info("Searching backwards to find where behavior changed...")
            print()
            from_val, to_val = find_regression_range(
                file_path, to_val, all_nightlies,
                work_dir, args.timeout, args.ignore_messages
            )
            if from_val is None:
                info("Could not find a behavior change in available nightlies.")
                return
            print()
            info(f"Narrowed to range: {from_val} .. {to_val}")
            print()

        # Validate from_val exists
        if from_val not in all_nightlies:
            error(f"Nightly {from_val} not found. Check https://github.com/{NIGHTLY_REPO}/tags")

        # Get range
        from_idx = all_nightlies.index(from_val)
        to_idx = all_nightlies.index(to_val)

        if from_idx > to_idx:
            error(f"FROM ({from_val}) must be before TO ({to_val})")

        nightlies = all_nightlies[from_idx:to_idx + 1]
        info(f"Bisecting range: {from_val} .. {to_val} ({len(nightlies)} nightlies)")
        print()

        lo, hi = bisect_nightlies(
            file_path, nightlies, work_dir,
            args.timeout, args.ignore_messages,
            verbose=args.verbose,
            file_display=file_arg
        )

        print()

        if lo is None:
            info("No behavior change detected between the endpoints.")
            info("Both toolchains produce the same output.")
            return

        success(f"Behavior change detected between {lo} and {hi}")
        print()

        # Get commit SHAs for the nightlies
        try:
            info("Fetching commit SHAs for these nightlies...")
            lo_sha = get_commit_for_nightly(lo)
            hi_sha = get_commit_for_nightly(hi)
            print(f"  {lo} -> {lo_sha[:12]}")
            print(f"  {hi} -> {hi_sha[:12]}")
            print()

            if args.nightly_only:
                # Just show the command to run manually
                print("To identify the exact commit that introduced the change, run:")
                cmd = f"  {sys.argv[0]} {file_arg} {lo_sha}...{hi_sha}"
                if args.timeout:
                    cmd += f" --timeout {args.timeout}"
                if args.ignore_messages:
                    cmd += " --ignore-messages"
                print(color(cmd, Colors.BOLD))
            else:
                # Automatically continue to commit bisection
                info("Continuing to bisect individual commits...")
                print()
                bisect_commits(
                    file_path, lo_sha, hi_sha,
                    work_dir, args.timeout, args.ignore_messages,
                    file_display=file_arg
                )
        except Exception as e:
            warn(f"Could not fetch commit SHAs: {e}")

    finally:
        shutil.rmtree(work_dir, ignore_errors=True)

if __name__ == '__main__':
    main()