mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-12 03:54:06 +00:00
llama : tokenizer fixes (#2549)
* Merge tokenizer fixes into the gguf branch. * Add test vocabularies
This commit is contained in:
@@ -633,17 +633,6 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
||||
return "The";
|
||||
}
|
||||
|
||||
// TODO: not great allocating this every time
|
||||
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
||||
// initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
|
||||
std::vector<llama_token> res(text.size() + (int) add_bos);
|
||||
const int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
||||
assert(n >= 0);
|
||||
res.resize(n);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) {
|
||||
auto lparams = llama_context_default_params();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user