Files
lean4/script/build_artifact.py
Kim Morrison 2236122411 feat: add build_artifact.py for downloading CI artifacts (#11735)
This PR adds a standalone script to download pre-built CI artifacts from
GitHub Actions. This allows us to quickly switch commits without
rebuilding.

**Features:**
- Downloads artifacts for current HEAD or specified commit (`--sha`)
- Caches in `~/.cache/lean_build_artifact/` for reuse
- Platform detection (Linux/macOS, x86_64/aarch64)

**Usage:**
```
build_artifact.py                   # Download for current HEAD
build_artifact.py --sha abc1234     # Download for specific commit
build_artifact.py --clear-cache     # Clear cache
```

This is extracted to be shared with `lean-bisect`.

🤖 Prepared with Claude Code

Co-authored-by: Claude <noreply@anthropic.com>
2025-12-19 04:09:23 +00:00

442 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
"""
build_artifact.py: Download pre-built CI artifacts for a Lean commit.
Usage:
build_artifact.py # Download artifact for current HEAD
build_artifact.py --sha abc1234 # Download artifact for specific commit
build_artifact.py --clear-cache # Clear artifact cache
This script downloads pre-built binaries from GitHub Actions CI runs,
which is much faster than building from source (~30s vs 2-5min).
Artifacts are cached in ~/.cache/lean_build_artifact/ for reuse.
"""
import argparse
import json
import os
import platform
import shutil
import subprocess
import sys
import urllib.request
import urllib.error
from pathlib import Path
from typing import Optional
# Constants
GITHUB_API_BASE = "https://api.github.com"
LEAN4_REPO = "leanprover/lean4"
# CI artifact cache
CACHE_DIR = Path.home() / '.cache' / 'lean_build_artifact'
ARTIFACT_CACHE = CACHE_DIR
# Sentinel value indicating CI failed (don't bother building locally)
CI_FAILED = object()
# ANSI colors for terminal output
class Colors:
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
BLUE = '\033[94m'
BOLD = '\033[1m'
RESET = '\033[0m'
def color(text: str, c: str) -> str:
"""Apply color to text if stdout is a tty."""
if sys.stdout.isatty():
return f"{c}{text}{Colors.RESET}"
return text
def error(msg: str) -> None:
"""Print error message and exit."""
print(color(f"Error: {msg}", Colors.RED), file=sys.stderr)
sys.exit(1)
def warn(msg: str) -> None:
"""Print warning message."""
print(color(f"Warning: {msg}", Colors.YELLOW), file=sys.stderr)
def info(msg: str) -> None:
"""Print info message."""
print(color(msg, Colors.BLUE), file=sys.stderr)
def success(msg: str) -> None:
"""Print success message."""
print(color(msg, Colors.GREEN), file=sys.stderr)
# -----------------------------------------------------------------------------
# Platform detection
# -----------------------------------------------------------------------------
def get_artifact_name() -> Optional[str]:
"""Get CI artifact name for current platform."""
system = platform.system()
machine = platform.machine()
if system == 'Darwin':
if machine == 'arm64':
return 'build-macOS aarch64'
return 'build-macOS' # Intel
elif system == 'Linux':
if machine == 'aarch64':
return 'build-Linux aarch64'
return 'build-Linux release'
# Windows not supported for CI artifact download
return None
# -----------------------------------------------------------------------------
# GitHub API helpers
# -----------------------------------------------------------------------------
_github_token_warning_shown = False
def get_github_token() -> Optional[str]:
"""Get GitHub token from environment or gh CLI."""
global _github_token_warning_shown
# Check environment variable first
token = os.environ.get('GITHUB_TOKEN')
if token:
return token
# Try to get token from gh CLI
try:
result = subprocess.run(
['gh', 'auth', 'token'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout.strip()
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
# Warn once if no token available
if not _github_token_warning_shown:
_github_token_warning_shown = True
warn("No GitHub authentication found. API rate limits may apply.")
warn("Run 'gh auth login' or set GITHUB_TOKEN to avoid rate limiting.")
return None
def github_api_request(url: str) -> dict:
"""Make a GitHub API request and return JSON response."""
headers = {
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'build-artifact'
}
token = get_github_token()
if token:
headers['Authorization'] = f'token {token}'
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, timeout=30) as response:
return json.loads(response.read().decode())
except urllib.error.HTTPError as e:
if e.code == 403:
error(f"GitHub API rate limit exceeded. Set GITHUB_TOKEN environment variable to increase limit.")
elif e.code == 404:
error(f"GitHub resource not found: {url}")
else:
error(f"GitHub API error: {e.code} {e.reason}")
except urllib.error.URLError as e:
error(f"Network error accessing GitHub API: {e.reason}")
# -----------------------------------------------------------------------------
# CI artifact cache functions
# -----------------------------------------------------------------------------
def get_cache_path(sha: str) -> Path:
"""Get cache directory for a commit's artifact."""
return ARTIFACT_CACHE / sha[:12]
def is_cached(sha: str) -> bool:
"""Check if artifact for this commit is already cached and valid."""
cache_path = get_cache_path(sha)
return cache_path.exists() and (cache_path / 'bin' / 'lean').exists()
def check_zstd_support() -> bool:
"""Check if tar supports zstd compression."""
try:
result = subprocess.run(
['tar', '--zstd', '--version'],
capture_output=True,
timeout=5
)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
def check_gh_available() -> bool:
"""Check if gh CLI is available and authenticated."""
try:
result = subprocess.run(
['gh', 'auth', 'status'],
capture_output=True,
timeout=10
)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
def download_ci_artifact(sha: str, quiet: bool = False):
"""
Try to download CI artifact for a commit.
Returns:
- Path to extracted toolchain directory if available
- CI_FAILED sentinel if CI run failed (don't bother building locally)
- None if no artifact available but local build might work
"""
# Check cache first
if is_cached(sha):
return get_cache_path(sha)
artifact_name = get_artifact_name()
if artifact_name is None:
return None # Unsupported platform
cache_path = get_cache_path(sha)
try:
# Query for CI workflow run for this commit, including status
# Note: Query parameters must be in the URL for GET requests
result = subprocess.run(
['gh', 'api', f'repos/{LEAN4_REPO}/actions/runs?head_sha={sha}&per_page=100',
'--jq', r'.workflow_runs[] | select(.name == "CI") | "\(.id) \(.conclusion // "null")"'],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0 or not result.stdout.strip():
return None # No CI run found (old commit?)
# Parse "run_id conclusion" format
line = result.stdout.strip().split('\n')[0]
parts = line.split(' ', 1)
run_id = parts[0]
conclusion = parts[1] if len(parts) > 1 else "null"
# Check if the desired artifact exists for this run
result = subprocess.run(
['gh', 'api', f'repos/{LEAN4_REPO}/actions/runs/{run_id}/artifacts',
'--jq', f'.artifacts[] | select(.name == "{artifact_name}") | .id'],
capture_output=True,
text=True,
timeout=30
)
if result.returncode != 0 or not result.stdout.strip():
# No artifact available
# If CI failed and no artifact, the build itself likely failed - skip
if conclusion == "failure":
return CI_FAILED
# Otherwise (in progress, expired, etc.) - fall back to local build
return None
# Download artifact
cache_path.mkdir(parents=True, exist_ok=True)
if not quiet:
print("downloading CI artifact... ", end='', flush=True)
result = subprocess.run(
['gh', 'run', 'download', run_id,
'-n', artifact_name,
'-R', LEAN4_REPO,
'-D', str(cache_path)],
capture_output=True,
text=True,
timeout=600 # 10 minutes for large downloads
)
if result.returncode != 0:
shutil.rmtree(cache_path, ignore_errors=True)
return None
# Extract tar.zst - find the file (name varies by platform/version)
tar_files = list(cache_path.glob('*.tar.zst'))
if not tar_files:
shutil.rmtree(cache_path, ignore_errors=True)
return None
tar_file = tar_files[0]
if not quiet:
print("extracting... ", end='', flush=True)
result = subprocess.run(
['tar', '--zstd', '-xf', tar_file.name],
cwd=cache_path,
capture_output=True,
timeout=300
)
if result.returncode != 0:
shutil.rmtree(cache_path, ignore_errors=True)
return None
# Move contents up from lean-VERSION-PLATFORM/ to cache_path/
# The extracted directory name varies (e.g., lean-4.15.0-linux, lean-4.15.0-darwin_aarch64)
extracted_dirs = [d for d in cache_path.iterdir() if d.is_dir() and d.name.startswith('lean-')]
if extracted_dirs:
extracted = extracted_dirs[0]
for item in extracted.iterdir():
dest = cache_path / item.name
if dest.exists():
if dest.is_dir():
shutil.rmtree(dest)
else:
dest.unlink()
shutil.move(str(item), str(cache_path / item.name))
extracted.rmdir()
# Clean up tar file
tar_file.unlink()
# Verify the extraction worked
if not (cache_path / 'bin' / 'lean').exists():
shutil.rmtree(cache_path, ignore_errors=True)
return None
return cache_path
except (subprocess.TimeoutExpired, FileNotFoundError):
shutil.rmtree(cache_path, ignore_errors=True)
return None
# -----------------------------------------------------------------------------
# Git helpers
# -----------------------------------------------------------------------------
def get_current_commit() -> str:
"""Get the current git HEAD commit SHA."""
try:
result = subprocess.run(
['git', 'rev-parse', 'HEAD'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return result.stdout.strip()
error(f"Failed to get current commit: {result.stderr.strip()}")
except subprocess.TimeoutExpired:
error("Timeout getting current commit")
except FileNotFoundError:
error("git not found")
def resolve_sha(short_sha: str) -> str:
"""Resolve a (possibly short) SHA to full 40-character SHA using git rev-parse."""
if len(short_sha) == 40:
return short_sha
try:
result = subprocess.run(
['git', 'rev-parse', short_sha],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
full_sha = result.stdout.strip()
if len(full_sha) == 40:
return full_sha
error(f"Cannot resolve SHA '{short_sha}': {result.stderr.strip() or 'not found in repository'}")
except subprocess.TimeoutExpired:
error(f"Timeout resolving SHA '{short_sha}'")
except FileNotFoundError:
error("git not found - required for SHA resolution")
# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description='Download pre-built CI artifacts for a Lean commit.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
This script downloads pre-built binaries from GitHub Actions CI runs,
which is much faster than building from source (~30s vs 2-5min).
Artifacts are cached in ~/.cache/lean_build_artifact/ for reuse.
Examples:
build_artifact.py # Download for current HEAD
build_artifact.py --sha abc1234 # Download for specific commit
build_artifact.py --clear-cache # Clear cache to free disk space
"""
)
parser.add_argument('--sha', metavar='SHA',
help='Commit SHA to download artifact for (default: current HEAD)')
parser.add_argument('--clear-cache', action='store_true',
help='Clear artifact cache and exit')
parser.add_argument('--quiet', '-q', action='store_true',
help='Suppress progress messages (still prints result path)')
args = parser.parse_args()
# Handle cache clearing
if args.clear_cache:
if ARTIFACT_CACHE.exists():
size = sum(f.stat().st_size for f in ARTIFACT_CACHE.rglob('*') if f.is_file())
shutil.rmtree(ARTIFACT_CACHE)
info(f"Cleared cache at {ARTIFACT_CACHE} ({size / 1024 / 1024:.1f} MB)")
else:
info(f"Cache directory does not exist: {ARTIFACT_CACHE}")
return
# Get commit SHA
if args.sha:
sha = resolve_sha(args.sha)
else:
sha = get_current_commit()
if not args.quiet:
info(f"Commit: {sha[:12]}")
# Check prerequisites
if not check_gh_available():
error("gh CLI not available or not authenticated. Run 'gh auth login' first.")
if not check_zstd_support():
error("tar does not support zstd compression. Install zstd or a newer tar.")
artifact_name = get_artifact_name()
if artifact_name is None:
error(f"No CI artifacts available for this platform ({platform.system()} {platform.machine()})")
if not args.quiet:
info(f"Platform: {artifact_name}")
# Check cache
if is_cached(sha):
path = get_cache_path(sha)
if not args.quiet:
success("Using cached artifact")
print(path)
return
# Download artifact
result = download_ci_artifact(sha, quiet=args.quiet)
if result is CI_FAILED:
if not args.quiet:
print() # End the "downloading..." line
error(f"CI build failed for commit {sha[:12]}")
elif result is None:
if not args.quiet:
print() # End the "downloading..." line
error(f"No CI artifact available for commit {sha[:12]}")
else:
if not args.quiet:
print(color("done", Colors.GREEN))
print(result)
if __name__ == '__main__':
main()