naming : improve consistency

This commit is contained in:
Georgi Gerganov
2026-05-08 12:24:57 +03:00
parent 778f9e247e
commit efa2f8e5a7
5 changed files with 201 additions and 201 deletions

View File

@@ -1981,7 +1981,7 @@ size_t server_prompt_cache::n_tokens() const {
return res;
}
server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t state_size_main, size_t state_size_drft) {
server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t state_size_tgt, size_t state_size_dft) {
// first check if the current state is contained fully in the cache
for (auto it = states.begin(); it != states.end(); ++it) {
const int cur_lcp_len = it->tokens.get_common_prefix(prompt.tokens);
@@ -2005,13 +2005,13 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
}
}
std::vector<uint8_t> state_data_main;
std::vector<uint8_t> state_data_drft;
std::vector<uint8_t> state_data_tgt;
std::vector<uint8_t> state_data_dft;
// check if we can allocate enough memory for the new state
try {
state_data_main.resize(state_size_main);
state_data_drft.resize(state_size_drft);
state_data_tgt.resize(state_size_tgt);
state_data_dft.resize(state_size_dft);
} catch (const std::bad_alloc & e) {
SRV_ERR("failed to allocate memory for prompt cache state: %s\n", e.what());
@@ -2027,8 +2027,8 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
states.push_back({
/*.tokens =*/ prompt.tokens.clone(),
/*.data =*/ {
/*.main =*/ std::move(state_data_main),
/*.drft =*/ std::move(state_data_drft),
/*.main =*/ std::move(state_data_tgt),
/*.drft =*/ std::move(state_data_dft),
},
/*.checkpoints =*/ prompt.checkpoints,
});
@@ -2036,7 +2036,7 @@ server_prompt * server_prompt_cache::alloc(const server_prompt & prompt, size_t
return &states.back();
}
bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx_main, llama_context * ctx_drft, int32_t id_slot) {
bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tokens_new, llama_context * ctx_tgt, llama_context * ctx_dft, int32_t id_slot) {
const int lcp_best = prompt.tokens.get_common_prefix(tokens_new);
float f_keep_best = prompt.tokens.size() > 0 ? float(lcp_best) / prompt.tokens.size() : -1.0f; // empty slot: any cache entry wins
@@ -2073,7 +2073,7 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok
auto & data = it_best->data.main;
const size_t size = data.size();
const size_t n = llama_state_seq_set_data_ext(ctx_main, data.data(), size, id_slot, 0);
const size_t n = llama_state_seq_set_data_ext(ctx_tgt, data.data(), size, id_slot, 0);
if (n != size) {
SRV_WRN("failed to restore state with size %zu\n", size);
@@ -2088,10 +2088,10 @@ bool server_prompt_cache::load(server_prompt & prompt, const server_tokens & tok
auto & data = it_best->data.drft;
if (!data.empty()) {
GGML_ASSERT(ctx_drft);
GGML_ASSERT(ctx_dft);
const size_t size = data.size();
const size_t n = llama_state_seq_set_data_ext(ctx_drft, data.data(), size, id_slot, 0);
const size_t n = llama_state_seq_set_data_ext(ctx_dft, data.data(), size, id_slot, 0);
if (n != size) {
SRV_WRN("failed to restore state with size %zu\n", size);