mirror of
https://github.com/leanprover/lean4.git
synced 2026-03-17 10:24:07 +00:00
This PR sets up the new integrated test/bench suite. It then migrates all benchmarks and some related tests to the new suite. There's also some documentation and some linting. For now, a lot of the old tests are left alone so this PR doesn't become even larger than it already is. Eventually, all tests should be migrated to the new suite though so there isn't a confusing mix of two systems.
211 lines
5.7 KiB
Python
Executable File
211 lines
5.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import resource
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Tuple
|
|
|
|
|
|
@dataclass
|
|
class PerfMetric:
|
|
event: str
|
|
factor: float = 1
|
|
unit: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class RusageMetric:
|
|
name: str
|
|
factor: float = 1
|
|
unit: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class Result:
|
|
category: str
|
|
value: float
|
|
unit: str | None
|
|
|
|
def fmt(self, topic: str) -> str:
|
|
data = {"metric": f"{topic}//{self.category}", "value": self.value}
|
|
if self.unit is not None:
|
|
data["unit"] = self.unit
|
|
return json.dumps(data)
|
|
|
|
|
|
PERF_METRICS = {
|
|
"task-clock": PerfMetric("task-clock", factor=1e-9, unit="s"),
|
|
"wall-clock": PerfMetric("duration_time", factor=1e-9, unit="s"),
|
|
"instructions": PerfMetric("instructions"),
|
|
"cycles": PerfMetric("cycles"),
|
|
}
|
|
|
|
PERF_UNITS = {
|
|
"msec": 1e-3,
|
|
"ns": 1e-9,
|
|
}
|
|
|
|
RUSAGE_METRICS = {
|
|
"maxrss": RusageMetric("ru_maxrss", factor=1000, unit="B"), # KiB on linux
|
|
}
|
|
|
|
ALL_METRICS = {**PERF_METRICS, **RUSAGE_METRICS}
|
|
DEFAULT_METRICS = set(ALL_METRICS.keys())
|
|
|
|
|
|
def resolve_metrics(metrics: set[str]) -> Tuple[set[str], set[str]]:
|
|
perf = set()
|
|
rusage = set()
|
|
unknown = set()
|
|
|
|
for metric in metrics:
|
|
if metric in PERF_METRICS:
|
|
perf.add(metric)
|
|
elif metric in RUSAGE_METRICS:
|
|
rusage.add(metric)
|
|
else:
|
|
unknown.add(metric)
|
|
|
|
if unknown:
|
|
raise SystemExit(f"unknown metrics: {', '.join(unknown)}")
|
|
|
|
return perf, rusage
|
|
|
|
|
|
def measure_perf(cmd: list[str], events: set[str]) -> dict[str, tuple[float, str]]:
|
|
with tempfile.NamedTemporaryFile() as tmp:
|
|
env = os.environ.copy()
|
|
env["LC_ALL"] = "C" # or perf may output syntactically invalid JSON
|
|
|
|
# On NixOS, perf effectively prepends /usr/bin to the PATH, but in this
|
|
# test suite, we often use the PATH to specify the binaries under test.
|
|
# Hence, we reset the PATH inside of perf using env.
|
|
cmd = [
|
|
*("perf", "stat", "-j", "-o", tmp.name),
|
|
*(arg for event in sorted(events) for arg in ["-e", event]),
|
|
"--",
|
|
*("env", f"PATH={env['PATH']}"),
|
|
*cmd,
|
|
]
|
|
|
|
# Execute command
|
|
result = subprocess.run(cmd, env=env)
|
|
if result.returncode != 0:
|
|
sys.exit(result.returncode)
|
|
|
|
# Collect results
|
|
perf = {}
|
|
for line in tmp:
|
|
data = json.loads(line)
|
|
if "event" in data and "counter-value" in data:
|
|
perf[data["event"]] = float(data["counter-value"]), data["unit"]
|
|
|
|
return perf
|
|
|
|
|
|
def get_perf_result(perf: dict[str, tuple[float, str]], metric: str) -> Result:
|
|
info = PERF_METRICS[metric]
|
|
if info.event in perf:
|
|
value, unit = perf[info.event]
|
|
else:
|
|
# Without the corresponding permissions,
|
|
# we only get access to the userspace versions of the counters.
|
|
value, unit = perf[f"{info.event}:u"]
|
|
|
|
value *= PERF_UNITS.get(unit, info.factor)
|
|
return Result(category=metric, value=value, unit=info.unit)
|
|
|
|
|
|
def get_rusage_result(rusage: resource.struct_rusage, metric: str) -> Result:
|
|
info = RUSAGE_METRICS[metric]
|
|
value = getattr(rusage, info.name) * info.factor
|
|
return Result(category=metric, value=value, unit=info.unit)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Measure resource usage of a command using perf and rusage.",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
)
|
|
parser.add_argument(
|
|
"-t",
|
|
"--topic",
|
|
action="append",
|
|
default=[],
|
|
help="topic prefix for the metrics",
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
"--metric",
|
|
action="append",
|
|
default=[],
|
|
help=f"metrics to measure. Can be specified multiple times. Available metrics: {', '.join(sorted(ALL_METRICS))}",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--default-metrics",
|
|
action="store_true",
|
|
help=f"measure a default set of metrics: {', '.join(sorted(DEFAULT_METRICS))}",
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
type=Path,
|
|
default=Path() / "measurements.jsonl",
|
|
help="output file to write measurements to, in the JSON Lines format",
|
|
)
|
|
parser.add_argument(
|
|
"-a",
|
|
"--append",
|
|
action="store_true",
|
|
help="append to the output file instead of overwriting it",
|
|
)
|
|
parser.add_argument(
|
|
"cmd",
|
|
help="command to measure the resource usage of",
|
|
)
|
|
parser.add_argument(
|
|
"args",
|
|
nargs="*",
|
|
default=[],
|
|
help="arguments to pass to the command",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
topics: list[str] = args.topic
|
|
metrics: set[str] = set(args.metric)
|
|
default_metrics: bool = args.default_metrics
|
|
output: Path = args.output
|
|
append: bool = args.append
|
|
cmd: list[str] = [args.cmd] + args.args
|
|
|
|
if default_metrics:
|
|
metrics |= DEFAULT_METRICS
|
|
|
|
perf_metrics, rusage_metrics = resolve_metrics(metrics)
|
|
perf_events = {PERF_METRICS[metric].event for metric in perf_metrics}
|
|
|
|
perf = measure_perf(cmd, perf_events)
|
|
rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
|
|
|
|
results = []
|
|
for metric in perf_metrics:
|
|
results.append(get_perf_result(perf, metric))
|
|
for metric in rusage_metrics:
|
|
results.append(get_rusage_result(rusage, metric))
|
|
|
|
with open(output, "a" if append else "w") as f:
|
|
for result in results:
|
|
for topic in topics:
|
|
f.write(f"{result.fmt(topic)}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|