chore: switch to new test/bench suite (#12590)

This PR sets up the new integrated test/bench suite. It then migrates
all benchmarks and some related tests to the new suite. There's also
some documentation and some linting.

For now, a lot of the old tests are left alone so this PR doesn't become
even larger than it already is. Eventually, all tests should be migrated
to the new suite though so there isn't a confusing mix of two systems.
This commit is contained in:
Garmelon
2026-02-25 14:51:53 +01:00
committed by GitHub
parent bd0c6a42c8
commit 08eb78a5b2
4585 changed files with 17733 additions and 1765 deletions

View File

@@ -4,29 +4,25 @@ To build Lean you should use `make -j$(nproc) -C build/release`.
## Running Tests
See `doc/dev/testing.md` for full documentation. Quick reference:
See `tests/README.md` for full documentation. Quick reference:
```bash
# Full test suite (use after builds to verify correctness)
make -j$(nproc) -C build/release test ARGS="-j$(nproc)"
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test
# Specific test by name (supports regex via ctest -R)
make -j$(nproc) -C build/release test ARGS='-R grind_ematch --output-on-failure'
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS='-R grind_ematch'
# Rerun only previously failed tests
make -j$(nproc) -C build/release test ARGS='--rerun-failed --output-on-failure'
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS='--rerun-failed'
# Single test from tests/lean/run/ (quick check during development)
cd tests/lean/run && ./test_single.sh example_test.lean
# ctest directly (from stage1 build dir)
cd build/release/stage1 && ctest -j$(nproc) --output-on-failure --timeout 300
# Single test from tests/foo/bar/ (quick check during development)
cd tests/foo/bar && ./run_test example_test.lean
```
The full test suite includes `tests/lean/`, `tests/lean/run/`, `tests/lean/interactive/`,
`tests/compiler/`, `tests/pkg/`, Lake tests, and more. Using `make test` or `ctest` runs
all of them; `test_single.sh` in `tests/lean/run/` only covers that one directory.
## New features
When asked to implement new features:
@@ -34,8 +30,6 @@ When asked to implement new features:
* write comprehensive tests first (expecting that these will initially fail)
* and then iterate on the implementation until the tests pass.
All new tests should go in `tests/lean/run/`. These tests don't have expected output; we just check there are no errors. You should use `#guard_msgs` to check for specific messages.
## Success Criteria
*Never* report success on a task unless you have verified both a clean build without errors, and that the relevant tests pass.

View File

@@ -85,7 +85,7 @@ jobs:
- name: CI Merge Checkout
run: |
git fetch --depth=1 origin ${{ github.sha }}
git checkout FETCH_HEAD flake.nix flake.lock script/prepare-* tests/lean/run/importStructure.lean
git checkout FETCH_HEAD flake.nix flake.lock script/prepare-* tests/elab/importStructure.lean
if: github.event_name == 'pull_request'
# (needs to be after "Checkout" so files don't get overridden)
- name: Setup emsdk
@@ -235,7 +235,7 @@ jobs:
# prefix `if` above with `always` so it's run even if tests failed
if: always() && steps.test.conclusion != 'skipped'
- name: Check Test Binary
run: ${{ matrix.binary-check }} tests/compiler/534.lean.out
run: ${{ matrix.binary-check }} tests/compile/534.lean.out
if: (!matrix.cross) && steps.test.conclusion != 'skipped'
- name: Build Stage 2
run: |
@@ -246,13 +246,7 @@ jobs:
make -C build -j$NPROC check-stage3
if: matrix.check-stage3
- name: Test Speedcenter Benchmarks
run: |
# Necessary for some timing metrics but does not work on Namespace runners
# and we just want to test that the benchmarks run at all here
#echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
export BUILD=$PWD/build PATH=$PWD/build/stage1/bin:$PATH
cd tests/bench
nix shell .#temci -c temci exec --config speedcenter.yaml --included_blocks fast --runs 1
run: nix shell github:Kha/lakeprof -c make -C build -j$NPROC bench
if: matrix.test-speedcenter
- name: Check rebootstrap
run: |

View File

@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.11)
cmake_minimum_required(VERSION 3.21)
option(USE_MIMALLOC "use mimalloc" ON)
@@ -147,6 +147,7 @@ ExternalProject_Add(
INSTALL_COMMAND ""
DEPENDS stage2
EXCLUDE_FROM_ALL ON
STEP_TARGETS configure
)
# targets forwarded to appropriate stages
@@ -157,6 +158,25 @@ add_custom_target(update-stage0-commit COMMAND $(MAKE) -C stage1 update-stage0-c
add_custom_target(test COMMAND $(MAKE) -C stage1 test DEPENDS stage1)
add_custom_target(
bench
COMMAND $(MAKE) -C stage2
COMMAND $(MAKE) -C stage2 -j1 bench
DEPENDS stage2
)
add_custom_target(
bench-part1
COMMAND $(MAKE) -C stage2
COMMAND $(MAKE) -C stage2 -j1 bench-part1
DEPENDS stage2
)
add_custom_target(
bench-part2
COMMAND $(MAKE) -C stage2
COMMAND $(MAKE) -C stage2 -j1 bench-part2
DEPENDS stage2
)
add_custom_target(clean-stdlib COMMAND $(MAKE) -C stage1 clean-stdlib DEPENDS stage1)
install(CODE "execute_process(COMMAND make -C stage1 install)")

View File

@@ -1,5 +1,9 @@
# Test Suite
**Warning:** This document is partially outdated.
It describes the old test suite, which is currently in the process of being replaced.
The new test suite's documentation can be found at [`tests/README.md`](../../tests/README.md).
After [building Lean](../make/index.md) you can run all the tests using
```
cd build/release

View File

@@ -83,7 +83,7 @@ def main (args : List String) : IO Unit := do
lastRSS? := some rss
let avgRSSDelta := totalRSSDelta / (n - 2)
IO.println s!"avg-reelab-rss-delta: {avgRSSDelta}"
IO.println s!"measurement: avg-reelab-rss-delta {avgRSSDelta*1024} b"
let _ Ipc.collectDiagnostics requestNo uri versionNo
( Ipc.stdin).writeLspMessage (Message.notification "exit" none)

View File

@@ -82,7 +82,7 @@ def main (args : List String) : IO Unit := do
lastRSS? := some rss
let avgRSSDelta := totalRSSDelta / (n - 2)
IO.println s!"avg-reelab-rss-delta: {avgRSSDelta}"
IO.println s!"measurement: avg-reelab-rss-delta {avgRSSDelta*1024} b"
let _ Ipc.collectDiagnostics requestNo uri versionNo
Ipc.shutdown requestNo

View File

@@ -9,5 +9,5 @@ find -regex '.*/CMakeLists\.txt\(\.in\)?\|.*\.cmake\(\.in\)?' \
! -path "./stage0/*" \
-exec \
uvx gersemi --in-place --line-length 120 --indent 2 \
--definitions src/cmake/Modules/ src/CMakeLists.txt \
--definitions src/cmake/Modules/ src/CMakeLists.txt tests/CMakeLists.txt \
-- {} +

View File

@@ -1,6 +1,4 @@
cmake_minimum_required(VERSION 3.10)
cmake_policy(SET CMP0054 NEW)
cmake_policy(SET CMP0110 NEW)
cmake_minimum_required(VERSION 3.21)
if(NOT CMAKE_GENERATOR MATCHES "Unix Makefiles")
message(FATAL_ERROR "The only supported CMake generator at the moment is 'Unix Makefiles'")
endif()

View File

@@ -38,7 +38,7 @@ def coercionsBannedInCore : Array Name := #[``optionCoe, ``instCoeSubarrayArray]
def coeLinter : Linter where
run := fun _ => do
let mainModule getMainModule
let isCoreModule := mainModule = `lean.run.linterCoe (mainModule.getRoot [`Init, `Std])
let isCoreModule := mainModule = `elab.linterCoe (mainModule.getRoot [`Init, `Std])
let shouldWarnOnDeprecated := getLinterValue linter.deprecatedCoercions ( getLinterOptions)
let trees Elab.getInfoTrees
for tree in trees do

16
tests/.gitignore vendored
View File

@@ -1 +1,15 @@
*.olean
# Generated by cmake
/env_test.sh
/env_bench.sh
# Created by test suite
*.out.produced
*.exit.produced
# Created by bench suite
*.measurements.jsonl
measurements.jsonl
# Created by compile tests
*.lean.c
*.lean.out

View File

@@ -1,27 +1,182 @@
#################
## Environment ##
#################
# MSYS2 bash usually handles Windows paths relatively well, but not when putting them in the PATH
string(REGEX REPLACE "^([a-zA-Z]):" "/\\1" LEAN_BIN "${CMAKE_BINARY_DIR}/bin")
# Environment variables
set(TEST_VARS "${LEAN_TEST_VARS}")
string(APPEND TEST_VARS " PATH=${LEAN_BIN}:$PATH")
# Test scripts can use these to find other parts of the repo, e.g. "$TEST_DIR/measure.py"
string(APPEND TEST_VARS " STAGE='${STAGE}'") # Using this should not normally be necessary
string(APPEND TEST_VARS " SRC_DIR='${CMAKE_SOURCE_DIR}'")
string(APPEND TEST_VARS " TEST_DIR='${CMAKE_CURRENT_SOURCE_DIR}'")
string(APPEND TEST_VARS " BUILD_DIR='${CMAKE_BINARY_DIR}'")
string(APPEND TEST_VARS " SCRIPT_DIR='${CMAKE_SOURCE_DIR}/../script'")
# Use the current stage's lean binary instead of whatever lake thinks we want
string(APPEND TEST_VARS " PATH='${LEAN_BIN}':\"$PATH\"")
string(APPEND TEST_VARS " LEANC_OPTS='${LEANC_OPTS}'")
# LEANC_OPTS in CXX is necessary for macOS c++ to find its headers
string(APPEND TEST_VARS " CXX='${CMAKE_CXX_COMPILER} ${LEANC_OPTS}'")
add_test(lean_help1 "${CMAKE_BINARY_DIR}/bin/lean" --help)
add_test(lean_help2 "${CMAKE_BINARY_DIR}/bin/lean" -h)
add_test(lean_version1 "${CMAKE_BINARY_DIR}/bin/lean" --version)
add_test(lean_version2 "${CMAKE_BINARY_DIR}/bin/lean" --v)
add_test(lean_ghash1 "${CMAKE_BINARY_DIR}/bin/lean" -g)
add_test(lean_ghash2 "${CMAKE_BINARY_DIR}/bin/lean" --githash)
add_test(lean_unknown_option bash "${LEAN_SOURCE_DIR}/cmake/check_failure.sh" "${CMAKE_BINARY_DIR}/bin/lean" "-z")
add_test(
lean_unknown_file1
bash
"${LEAN_SOURCE_DIR}/cmake/check_failure.sh"
"${CMAKE_BINARY_DIR}/bin/lean"
"boofoo.lean"
)
string(APPEND TEST_VARS " TEST_BENCH=")
configure_file(env.sh.in "${CMAKE_CURRENT_SOURCE_DIR}/env_test.sh")
block()
string(APPEND TEST_VARS " TEST_BENCH=1")
configure_file(env.sh.in "${CMAKE_CURRENT_SOURCE_DIR}/env_bench.sh")
endblock()
######################
## Helper functions ##
######################
function(check_test_bench_scripts DIR DIR_ABS)
set(RUN_TEST "${DIR_ABS}/run_test")
set(RUN_BENCH "${DIR_ABS}/run_bench")
set(RUN_TEST_EXISTS FALSE)
set(RUN_BENCH_EXISTS FALSE)
if(EXISTS "${RUN_TEST}")
set(RUN_TEST_EXISTS TRUE)
endif()
if(EXISTS "${RUN_BENCH}")
set(RUN_BENCH_EXISTS TRUE)
endif()
if(NOT RUN_TEST_EXISTS AND NOT RUN_BENCH_EXISTS)
message(FATAL_ERROR "${DIR}: Found neither a run_test nor a run_bench file")
return()
endif()
# Replace with return(PROPAGATE) if we ever update to cmake 3.25+
set(RUN_TEST "${RUN_TEST}" PARENT_SCOPE)
set(RUN_BENCH "${RUN_BENCH}" PARENT_SCOPE)
set(RUN_TEST_EXISTS "${RUN_TEST_EXISTS}" PARENT_SCOPE)
set(RUN_BENCH_EXISTS "${RUN_BENCH_EXISTS}" PARENT_SCOPE)
endfunction()
function(check_bench_argument DIR ARGS_BENCH RUN_BENCH_EXISTS)
if(RUN_BENCH_EXISTS AND NOT ARGS_BENCH)
message(FATAL_ERROR "${DIR}: run_bench file found, BENCH argument must be specified")
return()
endif()
if(NOT RUN_BENCH_EXISTS AND ARGS_BENCH)
message(FATAL_ERROR "${DIR}: BENCH argument specified but no run_bench file found")
return()
endif()
endfunction()
function(add_combined_measurements OUTPUT)
if(NOT ARGN)
message(AUTHOR_WARNING "No input measurements provided for ${OUTPUT}")
add_custom_command(OUTPUT "${OUTPUT}" COMMAND "${CMAKE_COMMAND}" -E touch "${OUTPUT}")
return()
endif()
add_custom_command(
OUTPUT "${OUTPUT}"
DEPENDS "${ARGN}"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/combine.py" -o "${OUTPUT}" -- ${ARGN}
)
endfunction()
# A test pile is a directory containing many test files, each of which
# represents a separate test (or benchmark). The directory may also contain
# additional files or subdirectories required by the individual test files.
#
# If a run_test script is present, each test file will be added as a test. Tests
# can be disabled on a per-file basis by creating a `<file>.no_test` file.
#
# If a run_bench script is present, each test file will be added as a benchmark.
# Benchmarks can be disabled on a per-file basis by creating a `<file>.no_bench`
# file. CMake expects the bench script to produce a `<file>.measurements.jsonl`
# file next to the test file. The individual measurements will be combined into
# a single `measurements.jsonl` file in the pile directory, whose path will be
# added to the list specified by the BENCH argument.
function(add_test_pile DIR GLOB)
cmake_parse_arguments(ARGS "" BENCH "" ${ARGN})
set(DIR_ABS "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}")
check_test_bench_scripts("${DIR}" "${DIR_ABS}")
check_bench_argument("${DIR}" "${ARGS_BENCH}" "${RUN_BENCH_EXISTS}")
# The test files' individual measurement files that will later be combined
# into a single measurements.jsonl file
set(MEASUREMENTS_FILES "")
# Iterate over all files matching the glob
file(GLOB TEST_FILES "${DIR_ABS}/${GLOB}")
foreach(FILE_ABS IN LISTS TEST_FILES)
# Path relative to source directory
cmake_path(RELATIVE_PATH FILE_ABS BASE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE FILE)
# Path relative to pile directory
cmake_path(RELATIVE_PATH FILE_ABS BASE_DIRECTORY "${DIR_ABS}" OUTPUT_VARIABLE FILE_NAME)
if(RUN_TEST_EXISTS AND NOT EXISTS "${FILE_ABS}.no_test")
add_test(NAME "${FILE}" WORKING_DIRECTORY "${DIR_ABS}" COMMAND "${RUN_TEST}" "${FILE_NAME}")
endif()
if(RUN_BENCH_EXISTS AND NOT EXISTS "${FILE_ABS}.no_bench")
set(MEASUREMENTS_FILE "${FILE_ABS}.measurements.jsonl")
list(APPEND MEASUREMENTS_FILES "${MEASUREMENTS_FILE}")
add_custom_command(
OUTPUT "${MEASUREMENTS_FILE}"
WORKING_DIRECTORY "${DIR_ABS}"
COMMAND "${CMAKE_COMMAND}" -E remove -f "${MEASUREMENTS_FILE}"
COMMAND "${RUN_BENCH}" "${FILE_NAME}"
)
endif()
endforeach()
# Combine measurements
if(RUN_BENCH_EXISTS)
set(MEASUREMENTS_FILE "${DIR_ABS}/measurements.jsonl")
list(APPEND "${ARGS_BENCH}" "${MEASUREMENTS_FILE}")
set("${ARGS_BENCH}" "${${ARGS_BENCH}}" PARENT_SCOPE)
add_combined_measurements("${MEASUREMENTS_FILE}" "${MEASUREMENTS_FILES}")
endif()
endfunction()
# A test directory is a directory containing a single test (or benchmark),
# alongside any additional files or subdirectories required by that test.
function(add_test_dir DIR)
cmake_parse_arguments(ARGS "" BENCH "" ${ARGN})
set(DIR_ABS "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}")
check_test_bench_scripts("${DIR}" "${DIR_ABS}")
check_bench_argument("${DIR}" "${ARGS_BENCH}" "${RUN_BENCH_EXISTS}")
# Add as test
if(RUN_TEST_EXISTS)
add_test(NAME "${DIR}" WORKING_DIRECTORY "${DIR_ABS}" COMMAND "${RUN_TEST}")
endif()
# Add as benchmark
if(RUN_BENCH_EXISTS)
set(MEASUREMENTS_FILE "${DIR_ABS}/measurements.jsonl")
list(APPEND "${ARGS_BENCH}" "${MEASUREMENTS_FILE}")
set("${ARGS_BENCH}" "${${ARGS_BENCH}}" PARENT_SCOPE)
add_custom_command(
OUTPUT "${MEASUREMENTS_FILE}"
WORKING_DIRECTORY "${DIR_ABS}"
COMMAND "${CMAKE_COMMAND}" -E remove -f "${MEASUREMENTS_FILE}"
COMMAND "${RUN_BENCH}"
)
endif()
endfunction()
# Benchmarks are split into two parts which should be roughly equal in total runtime.
# In radar, each part is run on a different runner.
set(PART1 "")
set(PART2 "")
##########################
## Tests and benchmarks ##
##########################
# LEAN TESTS
file(GLOB LEANTESTS "${LEAN_SOURCE_DIR}/../tests/lean/*.lean")
@@ -221,3 +376,31 @@ foreach(T ${LEANLAKETESTS})
)
endif()
endforeach(T)
add_test_pile(compile *.lean BENCH PART2)
add_test_pile(compile_bench *.lean BENCH PART2)
add_test_pile(elab *.lean)
add_test_pile(elab_bench *.lean BENCH PART2)
add_test_pile(elab_fail *.lean)
add_test_pile(misc *.sh)
add_test_pile(misc_bench *.sh BENCH PART2)
add_test_dir(bench/build BENCH PART1)
add_test_dir(bench/size BENCH PART1)
add_test_dir(lake_bench/inundation BENCH PART2)
#######################
## Benchmark targets ##
#######################
set(BENCH_MEASUREMENTS_PART1 "${CMAKE_CURRENT_SOURCE_DIR}/part1.measurements.jsonl")
set(BENCH_MEASUREMENTS_PART2 "${CMAKE_CURRENT_SOURCE_DIR}/part2.measurements.jsonl")
set(BENCH_MEASUREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/measurements.jsonl")
add_combined_measurements("${BENCH_MEASUREMENTS_PART1}" "${PART1}")
add_combined_measurements("${BENCH_MEASUREMENTS_PART2}" "${PART2}")
add_combined_measurements("${BENCH_MEASUREMENTS}" "${BENCH_MEASUREMENTS_PART1}" "${BENCH_MEASUREMENTS_PART2}")
add_custom_target(bench-part1 DEPENDS lean "${BENCH_MEASUREMENTS_PART1}" COMMENT "Run benchmarks (part 1)")
add_custom_target(bench-part2 DEPENDS lean "${BENCH_MEASUREMENTS_PART2}" COMMENT "Run benchmarks (part 2)")
add_custom_target(bench DEPENDS lean "${BENCH_MEASUREMENTS}" COMMENT "Run all benchmarks")

251
tests/README.md Normal file
View File

@@ -0,0 +1,251 @@
# Test suite
This directory contains the lean test and benchmark suite.
It is currently in the process of being migrated to the framework described in this file.
Some tests still use the previous framework,
which is partially documented in [testing.md](../doc/dev/testing.md).
The test suite consists of two types of directories: Test directories and test piles.
A **test directory** is a directory containing a `run_test` and/or a `run_bench` script.
It represents a single test or benchmark, depending on which script is present.
The run scripts are executed once with their working directory set to the test directory.
A **test pile** is also a directory containing a `run_test` and/or a `run_bench` script.
Here however, each file of a directory-specific extension (usually `.lean`) represents a single test or benchmark.
The run scripts are executed once for each test file with their working directory set to the pile directory.
Often, additional supplementary files are placed next to the test files and interpreted by the run scripts.
## Directory structure
Benchmarks belonging to the old framework are not included in this description.
- `bench`:
A bunch of benchmarks and benchmarking related files,
most of which are not part of the test suite.
- `build`:
A benchmark that builds the lean stdlib and measures the per-file performance.
- `size`:
A benchmark that measures the sizes of a few different kinds of files.
- `compile`:
Tests that compile lean files and then execute the resulting binary, verifying the resulting output.
They also run the same lean file through the interpreter.
- `compile_bench`:
Benchmarks that compile lean files and measure the execution of the resulting binary,
as well as optionally run the same lean file through the interpreter.
- `elab`:
Tests that elaborate lean files without executing them, verifying the resulting output.
- `elab_fail`:
Like `elab`, but expecting an exit code of 1 instead of 0.
- `elab_bench`:
Like `elab`, but measuring the elaboration performance.
- `lake_bench`:
Benchmark directories that measure lake performance.
- `misc`:
A collection of miscellaneous small test scripts.
- `misc_bench`:
A collection of miscellaneous small benchmark scripts.
## How to run the test suite?
Run all tests using
```sh
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test
```
Or rerun only the failed tests using
```sh
CTEST_PARALLEL_LEVEL="$(nproc)" CTEST_OUTPUT_ON_FAILURE=1 \
make -C build/release -j "$(nproc)" test ARGS="--rerun-failed"
```
Run an individual test by `cd`-ing into its directory and then using
```sh
./run_test # in a test directory
./run_test testfile # in a test pile
```
## How to run the bench suite?
Run the full benchmark suite using
```sh
make -C build/release -j "$(nproc)" bench # produces tests/measurements.jsonl
```
It is split into two roughly equal parts so it can be split among the benchmark runner machines.
Run each individual part using
```sh
make -C build/release -j "$(nproc)" bench-part1 # produces tests/part1.measurements.jsonl
make -C build/release -j "$(nproc)" bench-part2 # produces tests/part2.measurements.jsonl
```
Make sure not to specify `-j "$(nproc)"` when running the bench suite manually inside `build/release/stage<n>`.
Run an individual benchmark by `cd`-ing into its directory and then using
```sh
./run_bench # in a test directory
./run_bench testfile # in a test pile
```
## How to write a test or benchmark?
If your test fits one of the existing test piles:
1. Add your test file to the test pile.
2. Document the test via doc comment inside the test file.
3. Execute the test as documented above (or run the entire test suite).
4. Run [`fix_expected.py`](fix_expected.py) to create an `.out.expected` or `.out.ignored` file for the test.
5. Run [`lint.py`](lint.py).
If your test should be part of one of the existing test directories:
1. Modify the test directory to include your test.
2. Document the test via comment or `README.md`, following the test directory's conventions.
Otherwise, create a new test directory or pile:
1. Decide on a place to put the new directory.
2. Write a `run_test` and/or `run_bench` script.
3. Add the directory to the [`CMakeLists.txt`](CMakeLists.txt) file,
next to the other tests near the bottom.
4. Document the new directory in this readme file
by updating the directory structure section above.
5. Optionally update [`lint.py`](lint.py) if it makes sense.
## How to fix existing tests after your change breaks them?
If the tests break because the expected output differs from the actual output,
don't blindly copy the produced output into the expected output file.
Instead, execute [`fix_expected.py`](fix_expected.py) (you need to have `meld` installed).
This script allows you to review the changes one-by-one.
If the test output is very brittle, either modify the test so the output becomes less brittle,
or ignore the output by removing `.out.expected`,
re-running `fix_expected.py` and choosing to ignore the output.
Brittle output that should usually be ignored are detailed compiler debug traces
or inherently nondeterministic things like multithreading.
Some test directories or test piles strip or modify certain flaky or nondeterministic outputs
(e.g. benchmark timings, reference manual URLs).
## How to write a test or bench run script?
Test and bench scripts must be named `run_test` and `run_bench` respectively.
They must be executable and start with the shebang `#!/usr/bin/env bash`.
Immediately afterwards, they must source `env_test.sh` or `env_bench.sh` respectively
using a relative path.
The `env_*.sh` files set some build related environment variables,
plus a set of test suite related environment variables
document at the top of [`CMakeLists.txt`](CMakeLists.txt).
The most notable ones are:
- `TEST_DIR`: Absolute path to the `tests` directory.
- `SCRIPT_DIR`: Absolute path to the `script` directory.
- `TEST_BENCH`: Set to `1` if we're currently executing a benchmark, unset otherwise.
Finally, the run script should source `"$TEST_DIR/util.sh"`,
which provides a few utility functions and also uses `set` to set sensible bash defaults.
See `util.sh` for the available utility functions.
The run scripts are always executed with their working directory set to their surrounding directory.
Inside a test pile, `run_test` and `run_bench` receive
a relative path to the file under test as their first (and only) argument.
Inside a test directory, they receive no arguments.
A test succeeds iff the `run_test` script exits with exit code 0.
A benchmark additionally must produce a measurements file:
Inside a test pile, `run_bench testfile` is expected to produce a `testfile.measurments.jsonl` file.
Inside a test directory, `run_bench` is expected to produce a `measurements.jsonl` file.
## The `elab*` test pile
These files are available to configure a test:
- `<file>.init.sh`:
This file is sourced at the start of the run script.
Configure the run script by setting bash variables here.
- `<file>.before.sh`:
This file is executed before the test/benchmark.
Create or set up temporary resources used by the test here.
Usually, it is better to create temporary files or directories inside the test itself,
so they're also available when opening the file in your editor.
- `<file>.after.sh`:
This file is executed after the test/benchmark.
Delete temporary resources used by the test here.
- `<file>.out.expected`:
The test fails if its stdout and stderr doesn't match this file's contents.
If this file isn't present, the test's output must be empty.
- `<file>.out.ignored`:
Ignore the test's output entirely; don't compare it to `<file>.out.expected`.
- `<file>.exit.expected`:
The test fails if its exit code doesn't match this file's contents.
If this file isn't present, the pile's default exit code is used instead.
These bash variables (set via `<file>.init.sh`) are used by the run script:
- `TEST_LEAN_ARGS`:
A bash array of additional arguments to the `lean` command.
## The `compile*` test pile
These files are available to configure a test:
- `<file>.(do|no)_(compile|interpret)`,
`<file>.(do|no)_(compile|interpret)_(test|bench)`:
Enable or disable the compiler or interpreter during testing or benchmarking.
The more specific files take precedence over the more generic files.
Instead of disabling the compiler during tests, consider reducing the problem size
by passing different command line parameters via `<file>.init.sh`.
- `<file>.init.sh`:
This file is sourced at the start of the run script.
Configure the run script by setting bash variables here.
- `<file>.before.sh`:
This file is executed before the test/benchmark.
Create or set up temporary resources used by the test here.
Usually, it is better to create temporary files or directories inside the test itself,
so they're also available when opening the file in your editor.
- `<file>.after.sh`:
This file is executed after the test/benchmark.
Delete temporary resources used by the test here.
- `<file>.out.expected`:
The test fails if its stdout and stderr doesn't match this file's contents.
If this file isn't present, the test's output must be empty.
- `<file>.out.ignored`:
Ignore the test's output entirely; don't compare it to `<file>.out.expected`.
- `<file>.exit.expected`:
The test fails if its exit code doesn't match this file's contents.
If this file isn't present, the test's exit code must be 0.
These bash variables (set via `<file>.init.sh`) are used by the run script:
- `TEST_LEAN_ARGS`:
A bash array of additional arguments to the `lean` command used to compile the lean file.
- `TEST_LEANC_ARGS`:
A bash array of additional arguments to the `leanc` command used to compile the c file.
- `TEST_LEANI_ARGS`:
A bash array of additional arguments to the `lean --run <file>` command used to interpret the lean file.
- `TEST_ARGS`:
A bash array of arguments to the compiled (or interpreted) program.
Check `TEST_BENCH` if you want to specify more intense parameters for benchmarks.

View File

@@ -1,24 +0,0 @@
# Lean 4 benchmark suite
This directory contains the new Lean 4 benchmark suite.
It is built around [radar](github.com/leanprover/radar)
and benchmark results can be viewed
on the [Lean FRO radar instance](https://radar.lean-lang.org/repos/lean4).
Benchmarks are organized into subdirectories.
Each benchmark directory must contain a script called `run` that executes the benchmark,
as well as any additional benchmark-specific required files.
Ideally, each benchmark directory also contains a `README.md` explaining the benchmark.
To execute the entire suite, run `tests/bench-radar/run` in the repo root.
To execute an individua benchmark, run `tests/bench-radar/<benchmark>/run` in the repo root.
All scripts output their measurements into the file `measurements.jsonl`.
Radar sums any duplicated measurements with matching metrics.
To post-process the `measurements.jsonl` file this way in-place,
run `tests/bench-radar/combine.py` in the repo root after executing the benchmark suite.
All scripts related to the new benchmark suite are contained in this directory.
The files at `tests/bench` belong to the old suite.
The `*.py` symlinks are only for convenience when editing the python scripts in VSCode,
so the python extensions (in particular pyrefly) treat it as a python file.

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env python3
import json
import subprocess
import sys
from pathlib import Path
def run(*args: str) -> None:
subprocess.run(args, check=True)
def run_stdout(*command: str, cwd: str | None = None) -> str:
result = subprocess.run(command, capture_output=True, encoding="utf-8", cwd=cwd)
if result.returncode != 0:
print(result.stdout, end="", file=sys.stdout)
print(result.stderr, end="", file=sys.stderr)
sys.exit(result.returncode)
return result.stdout
def main() -> None:
script_file = Path(__file__)
template_file = script_file.parent / "lakeprof_report_template.html"
sha = run_stdout("git", "rev-parse", "@").strip()
base_url = f"https://speed.lean-lang.org/lean4-out/{sha}"
report = run_stdout("lakeprof", "report", "-prc", cwd="src")
with open(template_file) as f:
template = f.read()
template = template.replace("__BASE_URL__", json.dumps(base_url))
template = template.replace("__LAKEPROF_REPORT__", report)
with open("index.html", "w") as f:
f.write(template)
run("curl", "-fT", "index.html", f"{base_url}/index.html")
run("curl", "-fT", "src/lakeprof.log", f"{base_url}/lakeprof.log")
run("curl", "-fT", "src/lakeprof.trace_event", f"{base_url}/lakeprof.trace_event")
if __name__ == "__main__":
main()

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env bash
set -euxo pipefail
BENCH="tests/bench-radar"
STAGE2="build/release/stage2"
STAGE3="build/release/stage3"
# Build previous stages and warm up stage3
cmake --preset release
timeout -s KILL 1h time make -C build/release -j"$(nproc)" stage3
pushd "$STAGE3"
mkdir install
make install DESTDIR=install
find lib -name "*.olean" -delete
popd
# Use stage2 binaries from now on
#
# Otherwise, tools like lakeprof use the global lean installation,
# which may not exist or be the right version.
export PATH="$PWD/$STAGE2/bin:$PATH"
# Substitute our own wrapper script
mv "$STAGE2/bin/lean" "$STAGE2/bin/lean.wrapped"
cp "$BENCH/build/lean_wrapper.py" "$STAGE2/bin/lean"
# Build stage3
"$BENCH/measure.py" -t build \
-m cycles -m instructions -m maxrss -m task-clock -m wall-clock -- \
lakeprof record -- \
make -C build/release -j"$(nproc)" stage3
# Analyze lakeprof data
mv lakeprof.log src
pushd src
lakeprof report -pj | jq '{metric: "build/lakeprof/longest build path//wall-clock", value: .[-1][2], unit: "s"}' -c >> ../measurements.jsonl
lakeprof report -rj | jq '{metric: "build/lakeprof/longest rebuild path//wall-clock", value: .[-1][2], unit: "s"}' -c >> ../measurements.jsonl
popd
# Upload lakeprof report
# Guarded to prevent accidental uploads (which wouldn't work anyways) during local runs.
if [ -f build_upload_lakeprof_report ]; then
python3 "$BENCH/build/lakeprof_report_upload.py"
fi

View File

@@ -1,31 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
from pathlib import Path
OUTFILE = Path() / "measurements.jsonl"
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=f"Combine duplicated measurements in {OUTFILE.name} the way radar does, by summing their values."
)
args = parser.parse_args()
values: dict[str, float] = {}
units: dict[str, str | None] = {}
with open(OUTFILE, "r") as f:
for line in f:
data = json.loads(line)
metric = data["metric"]
values[metric] = values.get(metric, 0) + data["value"]
units[metric] = data.get("unit")
with open(OUTFILE, "w") as f:
for metric, value in values.items():
unit = units.get(metric)
data = {"metric": metric, "value": value}
if unit is not None:
data["unit"] = unit
f.write(f"{json.dumps(data)}\n")

View File

@@ -1,166 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
import os
import resource
import subprocess
import sys
import tempfile
from dataclasses import dataclass
from pathlib import Path
@dataclass
class PerfMetric:
event: str
factor: float = 1
unit: str | None = None
@dataclass
class RusageMetric:
name: str
factor: float = 1
unit: str | None = None
PERF_METRICS = {
"task-clock": PerfMetric("task-clock", factor=1e-9, unit="s"),
"wall-clock": PerfMetric("duration_time", factor=1e-9, unit="s"),
"instructions": PerfMetric("instructions"),
"cycles": PerfMetric("cycles"),
}
PERF_UNITS = {
"msec": 1e-3,
"ns": 1e-9,
}
RUSAGE_METRICS = {
"maxrss": RusageMetric("ru_maxrss", factor=1000, unit="B"), # KiB on linux
}
ALL_METRICS = {**PERF_METRICS, **RUSAGE_METRICS}
def measure_perf(cmd: list[str], events: list[str]) -> dict[str, tuple[float, str]]:
with tempfile.NamedTemporaryFile() as tmp:
cmd = [
*["perf", "stat", "-j", "-o", tmp.name],
*[arg for event in events for arg in ["-e", event]],
*["--", *cmd],
]
# Execute command
env = os.environ.copy()
env["LC_ALL"] = "C" # or else perf may output syntactically invalid json
result = subprocess.run(cmd, env=env)
if result.returncode != 0:
sys.exit(result.returncode)
# Collect results
perf = {}
for line in tmp:
data = json.loads(line)
if "event" in data and "counter-value" in data:
perf[data["event"]] = float(data["counter-value"]), data["unit"]
return perf
@dataclass
class Result:
category: str
value: float
unit: str | None
def fmt(self, topic: str) -> str:
metric = f"{topic}//{self.category}"
if self.unit is None:
return json.dumps({"metric": metric, "value": self.value})
return json.dumps({"metric": metric, "value": self.value, "unit": self.unit})
def measure(cmd: list[str], metrics: list[str]) -> list[Result]:
# Check args
unknown_metrics = []
for metric in metrics:
if metric not in RUSAGE_METRICS and metric not in PERF_METRICS:
unknown_metrics.append(metric)
if unknown_metrics:
raise Exception(f"unknown metrics: {', '.join(unknown_metrics)}")
# Prepare perf events
events: list[str] = []
for metric in metrics:
if info := PERF_METRICS.get(metric):
events.append(info.event)
# Measure
perf = measure_perf(cmd, events)
rusage = resource.getrusage(resource.RUSAGE_CHILDREN)
# Extract results
results = []
for metric in metrics:
if info := PERF_METRICS.get(metric):
if info.event in perf:
value, unit = perf[info.event]
else:
# Without the corresponding permissions,
# we only get access to the userspace versions of the counters.
value, unit = perf[f"{info.event}:u"]
value *= PERF_UNITS.get(unit, info.factor)
results.append(Result(metric, value, info.unit))
if info := RUSAGE_METRICS.get(metric):
value = getattr(rusage, info.name) * info.factor
results.append(Result(metric, value, info.unit))
return results
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Measure resource usage of a command using perf and rusage."
)
parser.add_argument(
"-t",
"--topic",
action="append",
default=[],
help="topic prefix for the metrics",
)
parser.add_argument(
"-m",
"--metric",
action="append",
default=[],
help=f"metrics to measure. Can be specified multiple times. Available metrics: {', '.join(sorted(ALL_METRICS))}",
)
parser.add_argument(
"-o",
"--output",
type=Path,
default=Path() / "measurements.jsonl",
)
parser.add_argument(
"cmd",
nargs="*",
help="command to measure the resource usage of",
)
args = parser.parse_args()
topics: list[str] = args.topic
metrics: list[str] = args.metric
output: Path = args.output
cmd: list[str] = args.cmd
results = measure(cmd, metrics)
with open(output, "a") as f:
for result in results:
for topic in topics:
f.write(f"{result.fmt(topic)}\n")

View File

@@ -1,108 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
import subprocess
import sys
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
REPO = Path()
OUTFILE = REPO / "measurements.jsonl"
OUTFILE_TMP = REPO / "measurements_repeated_tmp.jsonl"
@dataclass
class Measurement:
metric: str
value: float
unit: str | None
@classmethod
def from_json_str(cls, s: str) -> "Measurement":
data = json.loads(s.strip())
return cls(data["metric"], data["value"], data.get("unit"))
def to_json_str(self) -> str:
if self.unit is None:
return json.dumps({"metric": self.metric, "value": self.value})
return json.dumps(
{"metric": self.metric, "value": self.value, "unit": self.unit}
)
@contextmanager
def temporarily_move_outfile():
if OUTFILE_TMP.exists():
raise Exception(f"{OUTFILE_TMP} already exists")
OUTFILE.touch()
OUTFILE.rename(OUTFILE_TMP)
try:
yield
finally:
OUTFILE_TMP.rename(OUTFILE)
def read_measurements_from_outfile() -> list[Measurement]:
measurements = []
with open(OUTFILE, "r") as f:
for line in f:
measurements.append(Measurement.from_json_str(line))
return measurements
def write_measurements_to_outfile(measurements: list[Measurement]) -> None:
with open(OUTFILE, "a") as f:
for measurement in measurements:
f.write(f"{measurement.to_json_str()}\n")
def run_once(cmd: list[str]) -> list[Measurement]:
with temporarily_move_outfile():
proc = subprocess.run(cmd)
if proc.returncode != 0:
sys.exit(proc.returncode)
return read_measurements_from_outfile()
def repeatedly(cmd: list[str], iterations: int) -> list[Measurement]:
totals: dict[str, Measurement] = {}
for i in range(iterations):
for measurement in run_once(cmd):
if existing := totals.get(measurement.metric):
measurement.value += existing.value
totals[measurement.metric] = measurement
for measurement in totals.values():
measurement.value /= iterations
return list(totals.values())
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=f"Repeatedly run a command, averaging the resulting measurements in {OUTFILE.name}.",
)
parser.add_argument(
"-n",
"--iterations",
type=int,
default=5,
help="number of iterations",
)
parser.add_argument(
"cmd",
nargs="*",
help="command to repeatedly run",
)
args = parser.parse_args()
iterations: int = args.iterations
cmd: list[str] = args.cmd
measurements = repeatedly(cmd, iterations)
write_measurements_to_outfile(measurements)

View File

@@ -1,8 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
echo "Running benchmark: build"
tests/bench-radar/build/run
echo "Running benchmark: size"
tests/bench-radar/size/run

View File

@@ -1,42 +0,0 @@
# The `size` benchmark
This benchmark measures the number and size of a few kinds of files.
It expects to be executed after the `build` benchmark.
The following general metrics are collected:
- `size/libleanshared.so//bytes`
- `size/libleanshared.so//dynamic symbols`
- `size/libLake_shared.so//dynamic symbols`
The following metrics are collected from the entire build process:
- `size/all/.c//files`
- `size/all/.c//lines`
- `size/all/.cpp//files`
- `size/all/.cpp//lines`
- `size/all/.lean//files`
- `size/all/.lean//lines`
- `size/all/.ilean//files`
- `size/all/.ilean//bytes`
- `size/all/.olean//files`
- `size/all/.olean//bytes`
- `size/all/.olean.server//files`
- `size/all/.olean.server//bytes`
- `size/all/.olean.private//files`
- `size/all/.olean.private//bytes`
- `size/all/.ir//files`
- `size/all/.ir//bytes`
The following metrics are collected only for the `Init` library.
- `size/init/.olean//files`
- `size/init/.olean//bytes`
- `size/init/.olean.server//files`
- `size/init/.olean.server//bytes`
- `size/init/.olean.private//files`
- `size/init/.olean.private//bytes`
The following metric measures the size of all files produced by a `make install`.
- `size/install//bytes`

View File

@@ -1,95 +0,0 @@
#!/usr/bin/env python3
import json
import subprocess
from pathlib import Path
from typing import Iterable
OUTFILE = Path() / "measurements.jsonl"
SRC = Path("src")
STAGE3 = Path("build/release/stage3")
STAGE3_TEMP = STAGE3 / "lib" / "temp"
STAGE3_LEAN = STAGE3 / "lib" / "lean"
def output_result(metric: str, value: float, unit: str | None = None) -> None:
data = {"metric": metric, "value": value}
if unit is not None:
data["unit"] = unit
with open(OUTFILE, "a") as f:
f.write(f"{json.dumps(data)}\n")
def measure_bytes(topic: str, paths: Iterable[Path]) -> None:
amount = 0
total = 0
for path in paths:
amount += 1
total += path.stat().st_size
output_result(f"{topic}//files", amount)
output_result(f"{topic}//bytes", total, "B")
def measure_lines(topic: str, paths: Iterable[Path]) -> None:
amount = 0
total = 0
for path in paths:
amount += 1
with open(path) as f:
total += sum(1 for _ in f)
output_result(f"{topic}//files", amount)
output_result(f"{topic}//lines", total)
def measure_bytes_for_file(topic: str, path: Path) -> int:
size = path.stat().st_size
output_result(f"{topic}//bytes", size, "B")
return size
def measure_bytes_for_dir(topic: str, path: Path) -> int:
total = 0
for path in path.rglob("*"):
if path.is_file():
total += path.stat().st_size
output_result(f"{topic}//bytes", total, "B")
return total
def measure_symbols_for_file(topic: str, path: Path) -> int:
result = subprocess.run(
["nm", "--extern-only", "--defined-only", path],
capture_output=True,
encoding="utf-8",
check=True,
)
count = len(result.stdout.splitlines())
output_result(f"{topic}//dynamic symbols", count)
return count
if __name__ == "__main__":
measure_bytes_for_file("size/libleanshared.so", STAGE3_LEAN / "libleanshared.so")
measure_symbols_for_file("size/libleanshared.so", STAGE3_LEAN / "libleanshared.so")
measure_symbols_for_file(
"size/libLake_shared.so", STAGE3_LEAN / "libLake_shared.so"
)
measure_bytes_for_dir("size/install", STAGE3 / "install")
# Stdlib
measure_lines("size/all/.c", STAGE3_TEMP.glob("**/*.c"))
measure_bytes("size/all/.ir", STAGE3_LEAN.glob("**/*.ir"))
measure_lines("size/all/.cpp", SRC.glob("**/*.cpp"))
measure_lines("size/all/.lean", SRC.glob("**/*.lean"))
measure_bytes("size/all/.ilean", STAGE3_LEAN.glob("**/*.ilean"))
measure_bytes("size/all/.olean", STAGE3_LEAN.glob("**/*.olean"))
measure_bytes("size/all/.olean.server", STAGE3_LEAN.glob("**/*.olean.server"))
measure_bytes("size/all/.olean.private", STAGE3_LEAN.glob("**/*.olean.private"))
# Init
measure_bytes("size/init/.olean", STAGE3_LEAN.glob("Init/**/*.olean"))
measure_bytes("size/init/.olean.server", STAGE3_LEAN.glob("Init/**/*.olean.server"))
measure_bytes(
"size/init/.olean.private", STAGE3_LEAN.glob("Init/**/*.olean.private")
)

View File

@@ -1 +0,0 @@
run

View File

@@ -1,4 +1,3 @@
/build
*.out
*.lean.c
*.lean.linked.bc

View File

@@ -1 +0,0 @@
14

View File

@@ -1,7 +1,8 @@
# The `build` benchmark
This benchmark executes a complete build of the stage3 stdlib
and collects global and per-module metrics.
This benchmark executes a complete build of the stage3 stdlib from stage2 and
collects global and per-module metrics. This is different from most other
benchmarks, which benchmark the stage the bench suite is being executed in.
The following metrics are collected by a wrapper around the entire build process:

View File

@@ -0,0 +1 @@
../../lean_wrapper.py

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env python3
import json
import subprocess
import sys
from pathlib import Path
# Determine paths relative to the current file.
script_file = Path(__file__)
src_dir = script_file.parent.parent.parent.parent / "src"
template_file = script_file.parent / "lakeprof_report_template.html"
def run_stdout(*command: str, cwd: Path | None = None) -> str:
result = subprocess.run(command, capture_output=True, encoding="utf-8", cwd=cwd)
if result.returncode != 0:
print(result.stdout, end="", file=sys.stdout)
print(result.stderr, end="", file=sys.stderr)
sys.exit(result.returncode)
return result.stdout
sha = run_stdout("git", "rev-parse", "@", cwd=src_dir).strip()
base_url = f"https://speed.lean-lang.org/lean4-out/{sha}"
report = run_stdout("lakeprof", "report", "-prc", cwd=src_dir)
template = template_file.read_text()
template = template.replace("__BASE_URL__", json.dumps(base_url))
template = template.replace("__LAKEPROF_REPORT__", report)
(src_dir / "index.html").write_text(template)
def upload(file: Path) -> None:
subprocess.run(["curl", "-fT", file, f"{base_url}/{file.name}"], check=True)
upload(src_dir / "index.html")
upload(src_dir / "lakeprof.log")
upload(src_dir / "lakeprof.trace_event")

View File

@@ -2,24 +2,27 @@
import argparse
import json
import os
import re
import subprocess
import sys
from collections import Counter
from pathlib import Path
NAME = "build"
REPO = Path("..")
BENCH = REPO / "tests" / "bench-radar"
STAGE2 = REPO / "build" / "release" / "stage2"
OUT = REPO / "measurements.jsonl"
# Global paths
TEST_DIR = Path(os.environ["TEST_DIR"])
WRAPPER_OUT = Path(os.environ["WRAPPER_OUT"])
WRAPPER_PREFIX = Path(os.environ["WRAPPER_PREFIX"])
# Other config
BENCHMARK = "build"
def save_result(metric: str, value: float, unit: str | None = None) -> None:
def save_measurement(metric: str, value: float, unit: str | None = None) -> None:
data = {"metric": metric, "value": value}
if unit is not None:
data["unit"] = unit
with open(OUT, "a") as f:
with open(WRAPPER_OUT, "a") as f:
f.write(f"{json.dumps(data)}\n")
@@ -46,7 +49,7 @@ def get_module(setup: Path) -> str:
def count_lines(module: str, path: Path) -> None:
with open(path) as f:
lines = sum(1 for _ in f)
save_result(f"{NAME}/module/{module}//lines", lines)
save_measurement(f"{BENCHMARK}/module/{module}//lines", lines)
def count_bytes(module: str, path: Path, suffix: str) -> None:
@@ -54,18 +57,18 @@ def count_bytes(module: str, path: Path, suffix: str) -> None:
bytes = path.with_suffix(suffix).stat().st_size
except FileNotFoundError:
return
save_result(f"{NAME}/module/{module}//bytes {suffix}", bytes, "B")
save_measurement(f"{BENCHMARK}/module/{module}//bytes {suffix}", bytes, "B")
def run_lean(module: str) -> None:
stdout, stderr = run_capture(
f"{BENCH}/measure.py",
*("-t", f"{NAME}/module/{module}"),
*("-o", f"{OUT}"),
f"{TEST_DIR}/measure.py",
*("-t", f"{BENCHMARK}/module/{module}"),
*("-o", f"{WRAPPER_OUT}", "-a"),
*("-m", "instructions"),
*("-m", "cycles"),
"--",
f"{STAGE2}/bin/lean.wrapped",
"lean",
*("--profile", "-Dprofiler.threshold=9999999"),
"--stat",
*sys.argv[1:],
@@ -79,7 +82,7 @@ def run_lean(module: str) -> None:
seconds = float(match.group(2))
if match.group(3) == "ms":
seconds = seconds / 1000
save_result(f"{NAME}/profile/{name}//wall-clock", seconds, "s")
save_measurement(f"{BENCHMARK}/profile/{name}//wall-clock", seconds, "s")
# Output of `lean --stat`
stat = Counter[str]()
@@ -91,12 +94,20 @@ def run_lean(module: str) -> None:
for name, count in stat.items():
if count > 0:
if name.endswith("bytes"):
save_result(f"{NAME}/stat/{name}//bytes", count, "B")
save_measurement(f"{BENCHMARK}/stat/{name}//bytes", count, "B")
else:
save_result(f"{NAME}/stat/{name}//amount", count)
save_measurement(f"{BENCHMARK}/stat/{name}//amount", count)
def main() -> None:
if sys.argv[1:] == ["--print-prefix"]:
print(WRAPPER_PREFIX)
return
if sys.argv[1:] == ["--githash"]:
run("lean", "--githash")
return
parser = argparse.ArgumentParser()
parser.add_argument("lean", type=Path)
parser.add_argument("--setup", type=Path)

72
tests/bench/build/run_bench Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env bash
source ../../env_bench.sh
source "$TEST_DIR/util.sh"
STAGE_THIS="stage$STAGE"
STAGE_NEXT="stage$((STAGE + 1))"
BUILD_ROOT="$(realpath "$BUILD_DIR/..")"
BUILD_THIS="$(realpath "$BUILD_ROOT/$STAGE_THIS")"
BUILD_NEXT="$(realpath "$BUILD_ROOT/$STAGE_NEXT")"
OUT="$(realpath measurements.jsonl)"
echo
echo ">"
echo "> Configuring $STAGE_NEXT..."
echo ">"
# Building a stage mostly affects files in that stage's build directory.
# However, the bench suite runs inside the source directory for developer UX
# reasons, so some stage-specific bench suite files are generated in the source
# directory (namely the env_*.sh files).
#
# To avoid messing up the rest of the bench suite, we restore those files to
# STAGE_THIS's versions immediately after we configure STAGE_NEXT. Yes, this is
# a big hack, but it allows running the build benchmark as part of the bench
# suite instead of completely separately.
#
# Configuring STAGE_NEXT also builds all stages up to and including STAGE_THIS.
make -C "$BUILD_ROOT" -j"$(nproc)" "$STAGE_NEXT-configure"
make -C "$BUILD_ROOT" -j"$(nproc)" "$STAGE_THIS-configure"
echo
echo ">"
echo "> Warming up $STAGE_NEXT..."
echo ">"
make -C "$BUILD_NEXT" -j"$(nproc)"
find "$BUILD_NEXT/lib" -name "*.olean" -delete
rm -f measurements.jsonl
echo
echo ">"
echo "> Building $STAGE_NEXT..."
echo ">"
LAKE_OVERRIDE_LEAN=true LEAN="$(realpath fake_root/bin/lean)" \
WRAPPER_PREFIX="$(realpath fake_root)" WRAPPER_OUT="$OUT" \
lakeprof record -- \
"$TEST_DIR/measure.py" -t build -d -a -- \
make -C "$BUILD_NEXT" -j"$(nproc)"
echo
echo ">"
echo "> Analyzing lakeprof data..."
echo ">"
# Lakeprof must be executed in the src dir because it obtains some metadata by
# calling lake in its current working directory.
mv lakeprof.log "$SRC_DIR"
pushd "$SRC_DIR"
lakeprof report -pj | jq '{metric: "build/lakeprof/longest build path//wall-clock", value: .[-1][2], unit: "s"}' -c >> "$OUT"
lakeprof report -rj | jq '{metric: "build/lakeprof/longest rebuild path//wall-clock", value: .[-1][2], unit: "s"}' -c >> "$OUT"
popd

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
source ../../env_bench.sh
source "$TEST_DIR/util.sh"
# This should run in the same environment as run_bench, otherwise `lakeprof`
# will use the `lake` from the global system `elan` install and not the one from
# the current commit.
#
# Once an elan with support for relative toolchains has been widely released and
# been adopted by this repo, this wrapper script should no longer be necessary
# and the upload script can be called directly.
./lakeprof_report_upload.py

View File

@@ -1 +0,0 @@
15

View File

@@ -1 +0,0 @@
9

View File

@@ -1,2 +0,0 @@
/build
/test

View File

@@ -1 +0,0 @@
ex-50-50-1.leq

View File

@@ -1 +0,0 @@
5000

View File

@@ -1 +0,0 @@
80

View File

@@ -1 +0,0 @@
100000

View File

@@ -1 +0,0 @@
100000 10

View File

@@ -0,0 +1,4 @@
# Size measurements
This benchmark measures the number and size of a few kinds of files
produced by the current stage's build.

107
tests/bench/size/measure_sizes.py Executable file
View File

@@ -0,0 +1,107 @@
import argparse
import json
import subprocess
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument("src", type=Path)
parser.add_argument("build", type=Path)
parser.add_argument("install", type=Path)
parser.add_argument("output", type=Path)
args = parser.parse_args()
src: Path = args.src
build: Path = args.build
install: Path = args.install
output: Path = args.output
build_temp = build / "lib" / "temp"
build_lean = build / "lib" / "lean"
def output_measurement(
topic: str,
category: str,
value: float,
unit: str | None = None,
) -> None:
data = {"metric": f"{topic}//{category}", "value": value}
if unit is not None:
data["unit"] = unit
with open(output, "a") as f:
f.write(f"{json.dumps(data)}\n")
def measure_bytes_for_file(topic: str, path: Path, count: bool = True) -> None:
bytes = path.stat().st_size
output_measurement(topic, "bytes", bytes, "B")
if count:
output_measurement(topic, "files", 1)
def measure_bytes(topic: str, *paths: Path, count: bool = True) -> None:
for path in paths:
if path.is_file():
measure_bytes_for_file(topic, path, count=count)
def measure_lines_for_file(topic: str, path: Path, count: bool = True) -> None:
with open(path) as f:
lines = sum(1 for _ in f)
output_measurement(topic, "lines", lines)
if count:
output_measurement(topic, "files", 1)
def measure_lines(topic: str, *paths: Path, count: bool = True) -> None:
for path in paths:
if path.is_file():
measure_lines_for_file(topic, path, count=count)
def measure_symbols_for_file(topic: str, path: Path, count: bool = True) -> None:
result = subprocess.run(
["nm", "--extern-only", "--defined-only", path],
capture_output=True,
encoding="utf-8",
check=True,
)
symbols = len(result.stdout.splitlines())
output_measurement(topic, "dynamic symbols", symbols)
if count:
output_measurement(topic, "files", 1)
def measure_symbols(topic: str, *paths: Path, count: bool = True) -> None:
for path in paths:
if path.is_file():
measure_symbols_for_file(topic, path, count=count)
# Make sure not to measure things that depend on other tests or benchmarks (like
# the tests/compile binary size) since you can't rely on the order the tests or
# benchmarks are executed in.
# Misc
measure_bytes("size/libleanshared.so", build_lean / "libleanshared.so", count=False)
measure_symbols("size/libleanshared.so", build_lean / "libleanshared.so", count=False)
measure_symbols("size/libLake_shared.so", build_lean / "libLake_shared.so", count=False)
measure_bytes("size/install", *install.rglob("*"))
# Stdlib
measure_lines("size/all/.c", *build_temp.rglob("*.c"))
measure_bytes("size/all/.ir", *build_lean.rglob("*.ir"))
measure_lines("size/all/.cpp", *src.rglob("*.cpp"))
measure_lines("size/all/.lean", *src.rglob("*.lean"))
measure_bytes("size/all/.ilean", *build_lean.rglob("*.ilean"))
measure_bytes("size/all/.olean", *build_lean.rglob("*.olean"))
measure_bytes("size/all/.olean.server", *build_lean.rglob("*.olean.server"))
measure_bytes("size/all/.olean.private", *build_lean.rglob("*.olean.private"))
# Init
measure_bytes("size/Init/.olean", *build_lean.glob("Init/**/*.olean"))
measure_bytes("size/Init/.olean.server", *build_lean.glob("Init/**/*.olean.server"))
measure_bytes("size/Init/.olean.private", *build_lean.glob("Init/**/*.olean.private"))

7
tests/bench/size/run_bench Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
source ../../env_bench.sh
source "$TEST_DIR/util.sh"
make -C "$BUILD_DIR" install DESTDIR="$(realpath install)"
python measure_sizes.py "$SRC_DIR" "$BUILD_DIR" install measurements.jsonl
rm -rf install

View File

@@ -1,703 +0,0 @@
- attributes:
description: Init.Prelude async
tags: [other]
time: &time
#runner: time
# alternative config: use `perf stat` for extended properties
runner: perf_stat
perf_stat:
properties:
[
"wall-clock",
"task-clock",
"instructions",
"branches",
"branch-misses",
]
rusage_properties: ["maxrss"]
run_config:
<<: *time
cwd: ../../src
cmd: lean Init/Prelude.lean
- attributes:
description: Init.Data.List.Sublist async
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean Init/Data/List/Sublist.lean
- attributes:
description: Std.Data.Internal.List.Associative
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean Std/Data/Internal/List/Associative.lean
- attributes:
description: Std.Data.DHashMap.Internal.RawLemmas
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean Std/Data/DHashMap/Internal/RawLemmas.lean
- attributes:
description: Init.Data.BitVec.Lemmas
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean Init/Data/BitVec/Lemmas.lean
- attributes:
description: Init.Data.List.Sublist re-elab -j4
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean --run ../script/benchReelabRss.lean lean Init/Data/List/Sublist.lean 10 -j4
max_runs: 2
parse_output: true
- attributes:
description: Init.Data.BitVec.Lemmas re-elab
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean --run ../script/benchReelabRss.lean lean Init/Data/BitVec/Lemmas.lean 3 -j4
max_runs: 2
parse_output: true
- attributes:
description: Init.Data.List.Sublist re-elab -j4 (watchdog rss)
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean --run ../script/benchReelabWatchdogRss.lean lean Init/Data/List/Sublist.lean 10 -j4
max_runs: 2
parse_output: true
# This benchmark uncovered the promise cycle in `realizeConst` (#11328)
- attributes:
description: Init.Data.List.Basic re-elab
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean --run ../script/benchReelabRss.lean lean Init/Data/List/Basic.lean 10 -j4
max_runs: 2
parse_output: true
- attributes:
description: import Lean
tags: [other]
run_config:
<<: *time
cwd: ../../src
cmd: lean Lean.lean
- attributes:
description: tests/compiler
tags: [other]
run_config:
cwd: ../compiler/
cmd: |
set -eu
for f in *.lean; do ../bench/compile.sh $f > /dev/null; done
printf 'sum binary sizes: '
for f in *.lean; do printf '%s\0' "$f.out"; done | wc -c --files0-from=- | tail -1 | cut -d' ' -f 1
max_runs: 1
runner: output
- attributes:
description: tests/bench/ interpreted
tags: [other]
run_config:
<<: *time
cmd: |
bash -c '
set -euxo pipefail
ulimit -s unlimited
for f in *.args; do
lean --run ${f%.args} $(cat $f)
done
'
max_runs: 2
- attributes:
description: binarytrees
tags: [other]
run_config:
<<: *time
cmd: ./binarytrees.lean.out 21
build_config:
cmd: ./compile.sh binarytrees.lean
- attributes:
description: binarytrees.st
tags: [other]
run_config:
<<: *time
cmd: ./binarytrees.st.lean.out 21
build_config:
cmd: ./compile.sh binarytrees.st.lean
- attributes:
description: const_fold
tags: [other]
run_config:
<<: *time
cmd: bash -c "ulimit -s unlimited && ./const_fold.lean.out 23"
build_config:
cmd: ./compile.sh const_fold.lean
- attributes:
description: deriv
tags: [other]
run_config:
<<: *time
cmd: ./deriv.lean.out 10
build_config:
cmd: ./compile.sh deriv.lean
- attributes:
description: lake build clean
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
cd inundation
lake -flakefile-clean.lean clean
lake -flakefile-clean.lean build
"
max_runs: 2
build_config:
cmd: |
bash -c "
set -ex
ulimit -s unlimited
cd inundation
cp lakefile.lean lakefile-clean.lean
lake -flakefile-clean.lean -Ktest=Clean run mkBuild
lake -flakefile-clean.lean build
"
- attributes:
description: lake build no-op
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation -flakefile-nop.lean build
"
build_config:
cmd: |
bash -c "
set -ex
ulimit -s unlimited
cd inundation
cp lakefile.lean lakefile-nop.lean
lake -flakefile-nop.lean -Ktest=Nop run mkBuild
lake -flakefile-nop.lean build
"
- attributes:
description: lake config elab
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation -flakefile-rc.lean -R run nop
"
build_config:
cmd: cp inundation/lakefile.lean inundation/lakefile-rc.lean
- attributes:
description: lake config import
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation run nop
"
build_config:
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation run nop
"
- attributes:
description: lake config tree
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation/test/tree run nop
"
build_config:
cmd: |
lake -dinundation run mkTree
lake -dinundation/test/tree update
- attributes:
description: lake env
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake -dinundation env true
"
build_config:
cmd: lake -dinundation env true
- attributes:
description: lake startup
tags: [other]
run_config:
<<: *time
cmd: |
bash -c "
set -ex
ulimit -s unlimited
lake self-check
"
- attributes:
description: language server startup
tags: [other]
build_config:
cmd: ./compile.sh server_startup.lean
run_config:
<<: *time
cmd: ./server_startup.lean.out
- attributes:
description: language server startup with ileans
tags: [other]
build_config:
cmd: ./compile.sh watchdogRss.lean
run_config:
<<: *time
cmd: ./watchdogRss.lean.out
- attributes:
description: ilean roundtrip
tags: [other]
run_config:
<<: *time
cmd: ./ilean_roundtrip.lean.out 200000
parse_output: true
build_config:
cmd: ./compile.sh ilean_roundtrip.lean
- attributes:
description: identifier auto-completion
tags: [other]
run_config:
<<: *time
cmd: lean -Dlinter.all=false --run identifier_completion_runner.lean
parse_output: true
- attributes:
description: liasolver
tags: [other]
run_config:
<<: *time
cmd: ./liasolver.lean.out ex-50-50-1.leq
build_config:
cmd: ./compile.sh liasolver.lean
- attributes:
description: parser
tags: [other]
run_config:
<<: *time
cmd: ./parser.lean.out ../../src/Init/Prelude.lean 50
build_config:
cmd: ./compile.sh parser.lean
- attributes:
description: qsort
tags: [other]
run_config:
<<: *time
cmd: ./qsort.lean.out 400
build_config:
cmd: ./compile.sh qsort.lean
- attributes:
description: rbmap
tags: [other]
run_config:
<<: *time
cmd: ./rbmap.lean.out 2000000
build_config:
cmd: ./compile.sh rbmap.lean
- attributes:
description: rbmap_1
tags: [other]
run_config:
<<: *time
cmd: ./rbmap_checkpoint.lean.out 2000000 1
build_config:
cmd: ./compile.sh rbmap_checkpoint.lean
- attributes:
description: rbmap_10
tags: [other]
run_config:
<<: *time
cmd: ./rbmap_checkpoint.lean.out 2000000 10
build_config:
cmd: ./compile.sh rbmap_checkpoint.lean
- attributes:
description: rbmap_fbip
tags: [other]
run_config:
<<: *time
cmd: ./rbmap_fbip.lean.out 2000000
build_config:
cmd: ./compile.sh rbmap_fbip.lean
- attributes:
description: rbmap_library
tags: [other]
run_config:
<<: *time
cmd: ./rbmap_library.lean.out 2000000
build_config:
cmd: ./compile.sh rbmap_library.lean
- attributes:
description: reduceMatch
tags: [other]
run_config:
<<: *time
cmd: lean reduceMatch.lean
- attributes:
description: simp_arith1
tags: [other]
run_config:
<<: *time
cmd: lean simp_arith1.lean
- attributes:
description: simp_bubblesort_256
tags: [other]
run_config:
<<: *time
cmd: lean simp_bubblesort_256.lean
- attributes:
description: simp_local
tags: [other]
run_config:
<<: *time
cmd: lean simp_local.lean
- attributes:
description: simp_subexpr
tags: [other]
run_config:
<<: *time
cmd: lean simp_subexpr.lean
- attributes:
description: simp_congr
tags: [other]
run_config:
<<: *time
cmd: lean --tstack=16384 simp_congr.lean
- attributes:
description: mut_rec_wf
tags: [other]
run_config:
<<: *time
cmd: lean mut_rec_wf.lean
- attributes:
description: big_match
tags: [other]
run_config:
<<: *time
cmd: lean big_match.lean
- attributes:
description: big_match_partial
tags: [other]
run_config:
<<: *time
cmd: lean big_match_partial.lean
- attributes:
description: big_match_nat
tags: [other]
run_config:
<<: *time
cmd: lean big_match_nat.lean
- attributes:
description: big_match_nat_split
tags: [other]
run_config:
<<: *time
cmd: lean big_match_nat_split.lean
- attributes:
description: big_beq
tags: [other]
run_config:
<<: *time
cmd: lean big_beq.lean
- attributes:
description: big_beq_rec
tags: [other]
run_config:
<<: *time
cmd: lean big_beq_rec.lean
- attributes:
description: big_deceq
tags: [other]
run_config:
<<: *time
cmd: lean big_deceq.lean
- attributes:
description: big_deceq_rec
tags: [other]
run_config:
<<: *time
cmd: lean big_deceq_rec.lean
- attributes:
description: nat_repr
tags: [other]
run_config:
<<: *time
cmd: ./nat_repr.lean.out 5000
build_config:
cmd: ./compile.sh nat_repr.lean
- attributes:
description: big_struct
tags: [other]
run_config:
<<: *time
cmd: lean big_struct.lean
- attributes:
description: big_struct_dep1
tags: [other]
run_config:
<<: *time
cmd: lean big_struct_dep1.lean
- attributes:
description: big_struct_dep
tags: [other]
run_config:
<<: *time
cmd: lean big_struct_dep.lean
- attributes:
description: unionfind
tags: [other]
run_config:
<<: *time
cmd: ./unionfind.lean.out 3000000
build_config:
cmd: ./compile.sh unionfind.lean
- attributes:
description: workspaceSymbols
tags: [other]
run_config:
<<: *time
cmd: lean workspaceSymbols.lean
max_runs: 2
- attributes:
description: charactersIn
tags: [other]
run_config:
<<: *time
cmd: lean charactersIn.lean
max_runs: 2
- attributes:
description: bv_decide_realworld
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_realworld.lean
- attributes:
description: bv_decide_mul
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_mul.lean
- attributes:
description: bv_decide_mod
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_mod.lean
max_runs: 2
- attributes:
description: bv_decide_inequality.lean
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_inequality.lean
discarded_runs: 1
max_runs: 2
- attributes:
description: bv_decide_large_aig.lean
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_large_aig.lean
- attributes:
description: bv_decide_rewriter.lean
tags: [other]
run_config:
<<: *time
cmd: lean bv_decide_rewriter.lean
- attributes:
description: big_do
tags: [other]
run_config:
<<: *time
cmd: lean big_do.lean
- attributes:
description: big_omega.lean
tags: [other]
run_config:
<<: *time
cmd: lean big_omega.lean
- attributes:
description: big_omega.lean MT
tags: [other]
run_config:
<<: *time
cmd: lean big_omega.lean -Dinternal.cmdlineSnapshots=false
- attributes:
description: omega_stress.lean async
tags: [other]
run_config:
<<: *time
cmd: lean omega_stress.lean
- attributes:
description: channel.lean
tags: [other]
run_config:
<<: *time
cmd: ./channel.lean.out
parse_output: true
build_config:
cmd: ./compile.sh channel.lean
- attributes:
description: riscv-ast.lean
tags: [other]
run_config:
<<: *time
cmd: lean riscv-ast.lean
max_runs: 2
- attributes:
description: iterators (compiled)
tags: [other]
run_config:
<<: *time
cmd: ./iterators.lean.out
build_config:
cmd: ./compile.sh iterators.lean
- attributes:
description: iterators (interpreted)
tags: [other]
run_config:
<<: *time
cmd: lean --run iterators.lean
- attributes:
description: iterators (elab)
tags: [other]
run_config:
<<: *time
cmd: lean iterators.lean
- attributes:
description: sigma iterator
tags: [other]
run_config:
<<: *time
cmd: ./sigmaIterator.lean.out
build_config:
cmd: ./compile.sh sigmaIterator.lean
- attributes:
description: workspaceSymbols with new ranges
tags: [other]
run_config:
<<: *time
cmd: ./workspaceSymbolsNewRanges.lean.out
build_config:
cmd: ./compile.sh workspaceSymbolsNewRanges.lean
- attributes:
description: hashmap.lean
tags: [other]
run_config:
<<: *time
cmd: ./hashmap.lean.out 11 10000
parse_output: true
build_config:
cmd: ./compile.sh hashmap.lean
- attributes:
description: treemap.lean
tags: [other]
run_config:
<<: *time
cmd: ./treemap.lean.out 11 10000
parse_output: true
build_config:
cmd: ./compile.sh treemap.lean
- attributes:
description: phashmap.lean
tags: [other]
run_config:
<<: *time
cmd: ./phashmap.lean.out 11 10000
parse_output: true
build_config:
cmd: ./compile.sh phashmap.lean
- attributes:
description: grind_bitvec2.lean
tags: [other]
run_config:
<<: *time
cmd: lean ../lean/run/grind_bitvec2.lean
- attributes:
description: grind_list2.lean
tags: [other]
run_config:
<<: *time
cmd: lean ../lean/run/grind_list2.lean
- attributes:
description: grind_ring_5.lean
tags: [other]
run_config:
<<: *time
cmd: lean ../lean/run/grind_ring_5.lean
- attributes:
description: leanchecker --fresh Init
tags: [other]
run_config:
<<: *time
cmd: leanchecker --fresh Init
max_runs: 1
- attributes:
description: cbv tactic (leroy compiler verification course)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/leroy.lean
- attributes:
description: cbv tactic (prime filter)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/divisors.lean
- attributes:
description: cbv tactic (removing duplicates from the list)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/dedup.lean
- attributes:
description: cbv tactic (evaluating Decidable.decide)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/decide.lean
- attributes:
description: cbv tactic (evaluating List.mergeSort)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/merge_sort.lean
- attributes:
description: cbv tactic (System F normalization)
tags: [other]
run_config:
<<: *time
cmd: lean ./cbv/system_f.lean

View File

@@ -1 +0,0 @@
70000

80
tests/combine.py Executable file
View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
import argparse
import json
import sys
from pathlib import Path
from typing import Any
def add_measurement(
values: dict[str, float],
units: dict[str, str | None],
data: dict[str, Any],
) -> None:
metric = data["metric"]
values[metric] = values.get(metric, 0) + data["value"]
units[metric] = data.get("unit")
def format_measurement(
values: dict[str, float],
units: dict[str, str | None],
name: str,
) -> dict[str, Any]:
value = values[name]
unit = units.get(name)
data: dict[str, Any] = {"metric": name, "value": value}
if unit is not None:
data["unit"] = unit
return data
def main() -> None:
parser = argparse.ArgumentParser(
description="Combine measurement files in the JSON Lines format, summing duplicated measurements like radar does.",
)
parser.add_argument(
"input",
nargs="*",
default=[],
help="input files to read measurements from. If none are specified, measurements are read from stdin.",
)
parser.add_argument(
"-o",
"--output",
type=Path,
help="output file to write measurements to. If not specified, the result is printed to stdout.",
)
args = parser.parse_args()
inputs: list[Path] = args.input
output: Path | None = args.output
values: dict[str, float] = {}
units: dict[str, str | None] = {}
# Read measurements
if inputs:
for input in inputs:
with open(input, "r") as f:
for line in f:
add_measurement(values, units, json.loads(line))
else:
for line in sys.stdin:
add_measurement(values, units, json.loads(line))
# Write measurements
if output:
with open(output, "w") as f:
for metric in sorted(values):
f.write(f"{json.dumps(format_measurement(values, units, metric))}\n")
else:
for metric in sorted(values):
print(json.dumps(format_measurement(values, units, metric)))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1 @@
134

View File

@@ -0,0 +1 @@
134

Some files were not shown because too many files have changed in this diff Show More