Compare commits

...

2 Commits

Author SHA1 Message Date
Leonardo de Moura
f9ba57ea3d chore: add TODO comment 2024-07-16 16:59:45 -07:00
Leonardo de Moura
8558fd60f6 perf: add ShareCommon.shareCommon'
A more efficient (but less flexible) `shareCommon` API.
2024-07-16 16:36:35 -07:00
5 changed files with 211 additions and 3 deletions

View File

@@ -102,3 +102,11 @@ instance ShareCommonT.monadShareCommon [Monad m] : MonadShareCommon (ShareCommon
@[inline] def ShareCommonT.run [Monad m] (x : ShareCommonT σ m α) : m α := x.run' default
@[inline] def ShareCommonM.run (x : ShareCommonM σ α) : α := ShareCommonT.run x
/--
A more restrictive but efficient max sharing primitive.
Remark: it optimizes the number of RC operations, and the strategy for caching results.
-/
@[extern "lean_sharecommon_quick"]
def ShareCommon.shareCommon' (a : α) : α := a

View File

@@ -923,6 +923,7 @@ where
trace[Elab.definition] "{preDef.declName} : {preDef.type} :=\n{preDef.value}"
let preDefs withLevelNames allUserLevelNames <| levelMVarToParamPreDecls preDefs
let preDefs instantiateMVarsAtPreDecls preDefs
let preDefs shareCommonPreDefs preDefs
let preDefs fixLevelParams preDefs scopeLevelNames allUserLevelNames
for preDef in preDefs do
trace[Elab.definition] "after eraseAuxDiscr, {preDef.declName} : {preDef.type} :=\n{preDef.value}"

View File

@@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
Authors: Leonardo de Moura
-/
prelude
import Init.ShareCommon
import Lean.Compiler.NoncomputableAttr
import Lean.Util.CollectLevelParams
import Lean.Meta.AbstractNestedProofs
@@ -211,4 +212,15 @@ def checkCodomainsLevel (preDefs : Array PreDefinition) : MetaM Unit := do
m!"for `{preDefs[0]!.declName}` is{indentExpr type₀} : {← inferType type₀}\n" ++
m!"and for `{preDefs[i]!.declName}` is{indentExpr typeᵢ} : {← inferType typeᵢ}"
def shareCommonPreDefs (preDefs : Array PreDefinition) : CoreM (Array PreDefinition) := do profileitM Exception "share common exprs" ( getOptions) do
let mut es := #[]
for preDef in preDefs do
es := es.push preDef.type |>.push preDef.value
es := ShareCommon.shareCommon' es
let mut result := #[]
for h : i in [:preDefs.size] do
let preDef := preDefs[i]
result := result.push { preDef with type := es[2*i]!, value := es[2*i+1]! }
return result
end Lean.Elab

View File

@@ -448,9 +448,6 @@ static inline void lean_inc(lean_object * o) { if (!lean_is_scalar(o)) lean_inc_
static inline void lean_inc_n(lean_object * o, size_t n) { if (!lean_is_scalar(o)) lean_inc_ref_n(o, n); }
static inline void lean_dec(lean_object * o) { if (!lean_is_scalar(o)) lean_dec_ref(o); }
/* Just free memory */
LEAN_EXPORT void lean_dealloc(lean_object * o);
static inline bool lean_is_ctor(lean_object * o) { return lean_ptr_tag(o) <= LeanMaxCtorTag; }
static inline bool lean_is_closure(lean_object * o) { return lean_ptr_tag(o) == LeanClosure; }
static inline bool lean_is_array(lean_object * o) { return lean_ptr_tag(o) == LeanArray; }

View File

@@ -6,6 +6,8 @@ Author: Leonardo de Moura
*/
#include <vector>
#include <cstring>
#include <unordered_map>
#include <unordered_set>
#include "runtime/object.h"
#include "runtime/hash.h"
@@ -268,4 +270,192 @@ public:
extern "C" LEAN_EXPORT obj_res lean_state_sharecommon(b_obj_arg tc, obj_arg s, obj_arg a) {
return sharecommon_fn(tc, s)(a);
}
/*
A faster version of `sharecommon_fn` which only uses a local state.
It optimizes the number of RC operations, the strategy for caching results,
and uses C++ hashmap.
*/
class sharecommon_quick_fn {
struct set_hash {
std::size_t operator()(lean_object * o) const { return lean_sharecommon_hash(o); }
};
struct set_eq {
std::size_t operator()(lean_object * o1, lean_object * o2) const { return lean_sharecommon_eq(o1, o2); }
};
/*
We use `m_cache` to ensure we do **not** traverse a DAG as a tree.
We use pointer equality for this collection.
*/
std::unordered_map<lean_object *, lean_object *> m_cache;
/* Set of maximally shared terms. AKA hash-consing table. */
std::unordered_set<lean_object *, set_hash, set_eq> m_set;
/*
We do not increment reference counters when inserting Lean objects at `m_cache` and `m_set`.
This is correct because
- The domain of `m_cache` contains only sub-objects of `lean_sharecommon_quick` parameter,
and we know the object referenced by this parameter will remain alive.
- The range of `m_cache` contains only new objects that have been maxed shared, and these
objects will be are sub-objects of the object returned by `lean_sharecommon_quick`.
- `m_set` is like the range of `m_cache`.
*/
lean_object * check_cache(lean_object * a) {
if (!lean_is_exclusive(a)) {
// We only check the cache if `a` is a shared object
auto it = m_cache.find(a);
if (it != m_cache.end()) {
// All objects stored in the range of `m_cache` are single threaded.
lean_assert(lean_is_st(it->second));
// We increment the reference counter because this object
// will be returned by `lean_sharecommon_quick` or stored into a new object.
it->second->m_rc++;
return it->second;
}
}
return nullptr;
}
/*
`new_a` is a new object that is equal to `a`, but its subobjects are maximally shared.
*/
lean_object * save(lean_object * a, lean_object * new_a) {
lean_assert(lean_is_st(new_a));
lean_assert(new_a->m_rc == 1);
auto it = m_set.find(new_a);
lean_object * result;
if (it == m_set.end()) {
// `new_a` is a new object
m_set.insert(new_a);
result = new_a;
} else {
// We already have a maximally shared object that is equal to `new_a`
result = *it;
DEBUG_CODE({
if (lean_is_ctor(new_a)) {
lean_assert(lean_is_ctor(result));
unsigned num_objs = lean_ctor_num_objs(new_a);
lean_assert(lean_ctor_num_objs(result) == num_objs);
for (unsigned i = 0; i < num_objs; i++) {
lean_assert(lean_ctor_get(result, i) == lean_ctor_get(new_a, i));
}
}
});
lean_dec_ref(new_a); // delete `new_a`
// All objects in `m_set` are single threaded.
lean_assert(lean_is_st(result));
result->m_rc++;
lean_assert(result->m_rc > 1);
}
if (!lean_is_exclusive(a)) {
// We only cache the result if `a` is a shared object.
m_cache.insert(std::make_pair(a, result));
}
lean_assert(result == new_a || result->m_rc > 1);
lean_assert(result != new_a || result->m_rc == 1);
return result;
}
lean_object * visit_sarray(lean_object * a) {
lean_object * r = check_cache(a);
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
size_t sz = lean_sarray_size(a);
unsigned elem_sz = lean_sarray_elem_size(a);
lean_sarray_object * new_a = (lean_sarray_object*)lean_alloc_sarray(elem_sz, sz, sz);
memcpy(new_a->m_data, lean_to_sarray(a)->m_data, elem_sz*sz);
return save(a, (lean_object*)new_a);
}
lean_object * visit_string(lean_object * a) {
lean_object * r = check_cache(a);
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
size_t sz = lean_string_size(a);
size_t len = lean_string_len(a);
lean_string_object * new_a = (lean_string_object*)lean_alloc_string(sz, sz, len);
lean_set_st_header((lean_object*)new_a, LeanString, 0);
new_a->m_size = sz;
new_a->m_capacity = sz;
new_a->m_length = len;
memcpy(new_a->m_data, lean_to_string(a)->m_data, sz);
return save(a, (lean_object*)new_a);
}
lean_object * visit_array(lean_object * a) {
lean_object * r = check_cache(a);
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
size_t sz = array_size(a);
lean_array_object * new_a = (lean_array_object*)lean_alloc_array(sz, sz);
for (size_t i = 0; i < sz; i++) {
lean_array_set_core((lean_object*)new_a, i, visit(lean_array_get_core(a, i)));
}
return save(a, (lean_object*)new_a);
}
lean_object * visit_ctor(lean_object * a) {
lean_object * r = check_cache(a);
if (r != nullptr) { lean_assert(r->m_rc > 1); return r; }
unsigned num_objs = lean_ctor_num_objs(a);
unsigned tag = lean_ptr_tag(a);
unsigned sz = lean_object_byte_size(a);
unsigned scalar_offset = sizeof(lean_object) + num_objs*sizeof(void*);
unsigned scalar_sz = sz - scalar_offset;
lean_object * new_a = lean_alloc_ctor(tag, num_objs, scalar_sz);
for (unsigned i = 0; i < num_objs; i++) {
lean_ctor_set(new_a, i, visit(lean_ctor_get(a, i)));
}
if (scalar_sz > 0) {
memcpy(reinterpret_cast<char*>(new_a) + scalar_offset, reinterpret_cast<char*>(a) + scalar_offset, scalar_sz);
}
return save(a, new_a);
}
public:
/*
**TODO:** We did not implement stack overflow detection.
We claim it is not needed in the current uses of `shareCommon'`.
If this becomes an issue, we can use the following approach to address the issue without
affecting the performance.
- Add an extra `depth` parameter.
- In `operator()`, estimate the maximum depth based on the remaining stack space. See `check_stack`.
- If the limit is reached, simply return `a`.
*/
lean_object * visit(lean_object * a) {
if (lean_is_scalar(a)) {
return a;
}
switch (lean_ptr_tag(a)) {
/*
Similarly to `sharecommon_fn`, we only maximally share arrays, scalar arrays, strings, and
constructor objects.
*/
case LeanMPZ: lean_inc_ref(a); return a;
case LeanClosure: lean_inc_ref(a); return a;
case LeanThunk: lean_inc_ref(a); return a;
case LeanTask: lean_inc_ref(a); return a;
case LeanRef: lean_inc_ref(a); return a;
case LeanExternal: lean_inc_ref(a); return a;
case LeanReserved: lean_inc_ref(a); return a;
case LeanArray: return visit_array(a);
case LeanScalarArray: return visit_sarray(a);
case LeanString: return visit_string(a);
default: return visit_ctor(a);
}
}
lean_object * operator()(lean_object * a) {
return visit(a);
}
};
// def ShareCommon.shareCommon' (a : A) : A := a
extern "C" LEAN_EXPORT obj_res lean_sharecommon_quick(obj_arg a) {
return sharecommon_quick_fn()(a);
}
};