refactor: lake: download arts by default in cache get (#12927)

This PR changes `lake cache get` to download artifacts by default.
Artifacts can be downloaded on demand with the new `--mappings-only`
option (`--download-arts` is now obsolete).

In the future, the plan is to have Lake download mappings when cloning
dependencies. Then, `lake cache get` will primarily be used to download
artifacts eagerly. Thus, it makes sense to have that as the default.
This commit is contained in:
Mac Malone
2026-03-15 22:29:44 -04:00
committed by GitHub
parent 274997420a
commit ea8fca2d9f
3 changed files with 24 additions and 15 deletions

View File

@@ -377,7 +377,7 @@ OPTIONS:
--platform=<target-triple> with Reservoir or --repo, sets the platform
--toolchain=<name> with Reservoir or --repo, sets the toolchain
--scope=<remote-scope> scope for a custom endpoint
--download-arts download artifacts now, not on demand
--mappings-only only download mappings, delay artifacts
--force-download redownload existing files
Downloads build outputs for packages in the workspace from a remote cache
@@ -408,10 +408,9 @@ artifacts. If no mappings are found, Lake will backtrack the Git history up to
`--max-revs`, looking for a revision with mappings. If `--max-revs` is 0, Lake
will search the repository's entire history (or as far as Git will allow).
With a named service and without a mappings file, Lake will only download
the input-to-output mappings for packages. It will delay downloading of the
corresponding artifacts to the next `lake build` that requires them. Using
`--download-arts` will force Lake to download all artifacts eagerly.
By default, Lake will download both the input-to-output mappings and the
output artifacts for a package. By using `--mappings-onlys`, Lake will only
download the mappings abd delay downloading artifacts until they are needed.
If a download for an artifact fails or the download process for a whole
package fails, Lake will report this and continue on to the next. Once done,

View File

@@ -64,7 +64,7 @@ public structure LakeOptions where
offline : Bool := false
outputsFile? : Option FilePath := none
forceDownload : Bool := false
downloadArts : Bool := false
mappingsOnly : Bool := false
service? : Option String := none
scope? : Option CacheServiceScope := none
platform? : Option CachePlatform := none
@@ -249,7 +249,8 @@ def lakeLongOption : (opt : String) → CliM PUnit
| "--wfail" => modifyThe LakeOptions ({· with failLv := .warning})
| "--iofail" => modifyThe LakeOptions ({· with failLv := .info})
| "--force-download" => modifyThe LakeOptions ({· with forceDownload := true})
| "--download-arts" => modifyThe LakeOptions ({· with downloadArts := true})
| "--download-arts" => modifyThe LakeOptions ({· with mappingsOnly := false})
| "--mappings-only" => modifyThe LakeOptions ({· with mappingsOnly := true})
| "--service" => do
let service takeOptArg "--service" "service name"
modifyThe LakeOptions ({· with service? := some service})
@@ -409,6 +410,8 @@ protected def get : CliM PUnit := do
let ws loadWorkspace cfg
let cache := ws.lakeCache
if let some file := mappings? then liftM (m := LoggerIO) do
if opts.mappingsOnly then
error "`--mappings-only` is not supported with a mappings file; use `lake cache add` instead"
if opts.platform?.isSome || opts.toolchain?.isSome then
logWarning "the `--platform` and `--toolchain` options do nothing for `cache get` with a mappings file"
if opts.failLv .warning then
@@ -441,6 +444,9 @@ protected def get : CliM PUnit := do
match ws.lakeEnv.cacheArtifactEndpoint?, ws.lakeEnv.cacheRevisionEndpoint? with
| some artifactEndpoint, some revisionEndpoint =>
logWarning endpointDeprecation
if opts.mappingsOnly then
error "`--mappings-only` requires services to be configured
via the Lake system configuration (not enviroment variables)"
return .downloadService artifactEndpoint revisionEndpoint ws.lakeEnv.cacheService?
| none, none =>
return ws.defaultCacheService
@@ -469,7 +475,7 @@ protected def get : CliM PUnit := do
else
findOutputs cache service pkg remoteScope opts platform toolchain
cache.writeMap pkg.cacheScope map service.name? (some remoteScope)
if opts.downloadArts || service.name?.isNone then
unless opts.mappingsOnly do
let descrs map.collectOutputDescrs
service.downloadArtifacts descrs cache remoteScope opts.forceDownload
else if service.isReservoir then
@@ -483,7 +489,7 @@ protected def get : CliM PUnit := do
try
let map findOutputs cache service pkg remoteScope opts platform toolchain
cache.writeMap pkg.cacheScope map service.name? (some remoteScope)
if opts.downloadArts || service.name?.isNone then
unless opts.mappingsOnly do
let descrs map.collectOutputDescrs
service.downloadArtifacts descrs cache remoteScope opts.forceDownload
return ok

View File

@@ -58,6 +58,10 @@ echo "# TESTS"
test_err "must contain exactly one '/'" cache get --repo='invalid'
test_err 'invalid characters in repository name' cache get --repo='!/invalid'
# Test `--mappings-only` validation
test_err '`--mappings-only` is not supported' cache get --mappings-only bogus.jsonl
with_cdn_endpoints test_err '`--mappings-only` requires services' cache get --mappings-only
# Test `--service` validation
test_err 'service `bogus` not found in system configuration' \
cache get --service='bogus'
@@ -131,21 +135,21 @@ LAKE_CONFIG=services.toml test_run build +Test --no-build -v
with_cdn_endpoints test_not_out "downloading" \
cache get .lake/outputs.jsonl --scope='!/test'
with_cdn_endpoints test_not_out "downloading artifact" \
cache get --download-arts --scope='!/test'
cache get --scope='!/test'
with_cdn_endpoints test_not_out "downloading" \
cache get --download-arts --scope='!/test'
cache get --scope='!/test'
# Test that the revision cache directory for the package is properly created
test_exp -d $LAKE_CACHE_DIR/revisions/test
# Test cache get with custom endpoint using `--force-download`
with_cdn_endpoints test_out "downloading" \
cache get --scope='!/test' --force-download --download-arts
cache get --scope='!/test' --force-download
test_run build +Test --no-build
# Test download service configuration through system configuration
LAKE_CONFIG=services.toml test_out "downloading" \
cache get --scope='!/test' --force-download --download-arts --service=cdn
cache get --scope='!/test' --force-download --service=cdn
test_run build +Test --no-build
# Test cache put/get with a set platform/toolchain
@@ -189,13 +193,13 @@ test_cmd rm -rf .lake/packages/Cli/.lake/build "$LAKE_CACHE_DIR"
test_fails -f reservoir.toml build @Cli --no-build
test_err "failed to download artifacts for some dependencies" \
-f reservoir2.toml cache get --max-revs=1
test_run -f reservoir.toml cache get --max-revs=1
test_run -f reservoir.toml cache get --max-revs=1 --mappings-only
test_run -f reservoir.toml build @Cli --no-build # downloads arts
# Test Reservoir with `--repo` uses GitHub scope
test_cmd rm -rf .lake/packages/Cli/.lake/build "$LAKE_CACHE_DIR"
test_run -d .lake/packages/Cli cache get --repo=leanprover/lean4-cli --max-revs=1
test_run -d .lake/packages/Cli build --no-build # downloads arts
test_run -d .lake/packages/Cli build --no-build
# Cleanup
rm -rf .git produced.out