mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-03-17 16:44:07 +00:00
Autoparser - complete refactoring of parser architecture (#18675)
* Autoparser - full single commit squish * Final pre-merge changes: minor fixes, Kimi 2.5 model parser
This commit is contained in:
committed by
GitHub
parent
34df42f7be
commit
566059a26b
@@ -26,6 +26,7 @@ else()
|
||||
add_subdirectory(server)
|
||||
endif()
|
||||
add_subdirectory(tokenize)
|
||||
add_subdirectory(parser)
|
||||
add_subdirectory(tts)
|
||||
add_subdirectory(mtmd)
|
||||
if (GGML_RPC)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#include "chat.h"
|
||||
#include "common.h"
|
||||
#include "arg.h"
|
||||
#include "console.h"
|
||||
@@ -191,7 +192,8 @@ struct cli_context {
|
||||
inputs.use_jinja = chat_params.use_jinja;
|
||||
inputs.parallel_tool_calls = false;
|
||||
inputs.add_generation_prompt = true;
|
||||
inputs.enable_thinking = chat_params.enable_thinking;
|
||||
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
inputs.enable_thinking = common_chat_templates_support_enable_thinking(chat_params.tmpls.get());
|
||||
|
||||
// Apply chat template to the list of messages
|
||||
return common_chat_templates_apply(chat_params.tmpls.get(), inputs);
|
||||
|
||||
20
tools/parser/CMakeLists.txt
Normal file
20
tools/parser/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
|
||||
# this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API
|
||||
set(TARGET llama-debug-template-parser)
|
||||
add_executable(${TARGET} debug-template-parser.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
|
||||
if(LLAMA_TOOLS_INSTALL)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(TARGET llama-template-analysis)
|
||||
add_executable(${TARGET} template-analysis.cpp)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
|
||||
if(LLAMA_TOOLS_INSTALL)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
endif()
|
||||
452
tools/parser/debug-template-parser.cpp
Normal file
452
tools/parser/debug-template-parser.cpp
Normal file
@@ -0,0 +1,452 @@
|
||||
#include "../src/llama-grammar.h"
|
||||
#include "chat-auto-parser.h"
|
||||
#include "chat.h"
|
||||
#include "common.h"
|
||||
#include "gguf.h"
|
||||
#include "jinja/runtime.h"
|
||||
#include "log.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "peg-parser.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
enum class output_mode {
|
||||
ANALYSIS, // Only output analysis results (default)
|
||||
TEMPLATE, // Only output rendered template
|
||||
BOTH // Output both
|
||||
};
|
||||
|
||||
enum class input_message_type {
|
||||
NONE, // Don't render any message scenarios (only analysis)
|
||||
CONTENT_ONLY, // Simple assistant message with content
|
||||
REASONING_CONTENT, // Message with reasoning_content + content
|
||||
TOOL_CALL_ONLY, // Message with tool_calls only
|
||||
CONTENT_TOOL_CALL, // Message with content + tool_calls
|
||||
REASONING_TOOL_CALL, // Message with reasoning_content + tool_calls
|
||||
CONTENT_FAKE_TOOL_CALL, // Message with content but no actual tool_calls (for testing)
|
||||
ALL // Render all scenarios
|
||||
};
|
||||
|
||||
struct debug_options {
|
||||
std::string template_path;
|
||||
bool with_tools = true;
|
||||
bool generation_prompt = true;
|
||||
bool enable_reasoning = true;
|
||||
bool debug_jinja = false;
|
||||
bool force_tool_call = false;
|
||||
output_mode mode = output_mode::BOTH;
|
||||
input_message_type input_message = input_message_type::NONE;
|
||||
};
|
||||
|
||||
static std::string read_file(const std::string & path) {
|
||||
std::ifstream fin(path, std::ios::binary);
|
||||
if (!fin.is_open()) {
|
||||
throw std::runtime_error("Could not open file: " + path);
|
||||
}
|
||||
std::ostringstream buf;
|
||||
buf << fin.rdbuf();
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
static std::string read_gguf_chat_template(const std::string & path) {
|
||||
struct gguf_init_params params = { /*no_alloc =*/true, // We only need metadata, not tensor data
|
||||
/*ctx=*/nullptr };
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
|
||||
if (ctx == nullptr) {
|
||||
throw std::runtime_error("Could not open GGUF file: " + path);
|
||||
}
|
||||
|
||||
const char * key = "tokenizer.chat_template";
|
||||
int64_t key_id = gguf_find_key(ctx, key);
|
||||
|
||||
if (key_id == -1) {
|
||||
gguf_free(ctx);
|
||||
throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
|
||||
}
|
||||
|
||||
const char * template_str = gguf_get_val_str(ctx, key_id);
|
||||
if (template_str == nullptr) {
|
||||
gguf_free(ctx);
|
||||
throw std::runtime_error("GGUF file contains chat template key but value is null");
|
||||
}
|
||||
|
||||
std::string result = template_str;
|
||||
gguf_free(ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void print_usage(const char * program_name) {
|
||||
LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
|
||||
LOG_ERR("\nOptions:\n");
|
||||
LOG_ERR(" --no-tools Disable tool definitions\n");
|
||||
LOG_ERR(" --force-tool-call Set tool calls to forced\n");
|
||||
LOG_ERR(" --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
|
||||
LOG_ERR(" --enable-reasoning=0|1 Enable reasoning parsing (default: 1)\n");
|
||||
LOG_ERR(" --output=MODE Output mode: analysis, template, both (default: both)\n");
|
||||
LOG_ERR(" --debug-jinja Enable Jinja fine-grained debug\n");
|
||||
LOG_ERR(" --input-message=TYPE Message type to render:\n");
|
||||
LOG_ERR(" content_only, reasoning_content, tool_call_only,\n");
|
||||
LOG_ERR(" content_tool_call, reasoning_tool_call,\n");
|
||||
LOG_ERR(" content_fake_tool_call, all\n");
|
||||
LOG_ERR("\nExamples:\n");
|
||||
LOG_ERR(" %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
|
||||
LOG_ERR(" %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
|
||||
}
|
||||
|
||||
static bool parse_bool_option(const std::string & value) {
|
||||
return value == "1" || value == "true" || value == "yes";
|
||||
}
|
||||
|
||||
static bool parse_options(int argc, char ** argv, debug_options & opts) {
|
||||
if (argc < 2) {
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
opts.template_path = argv[1];
|
||||
|
||||
for (int i = 2; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "--force-tool-call") {
|
||||
opts.force_tool_call = true;
|
||||
} else if (arg == "--debug-jinja") {
|
||||
opts.debug_jinja = true;
|
||||
} else if (arg == "--no-tools") {
|
||||
opts.with_tools = false;
|
||||
} else if (arg.rfind("--generation-prompt=", 0) == 0) {
|
||||
opts.generation_prompt = parse_bool_option(arg.substr(20));
|
||||
} else if (arg.rfind("--enable-reasoning=", 0) == 0) {
|
||||
opts.enable_reasoning = parse_bool_option(arg.substr(19));
|
||||
} else if (arg.rfind("--output=", 0) == 0) {
|
||||
std::string mode = arg.substr(9);
|
||||
if (mode == "analysis") {
|
||||
opts.mode = output_mode::ANALYSIS;
|
||||
} else if (mode == "template") {
|
||||
opts.mode = output_mode::TEMPLATE;
|
||||
} else if (mode == "both") {
|
||||
opts.mode = output_mode::BOTH;
|
||||
} else {
|
||||
LOG_ERR("Unknown output mode: %s\n", mode.c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (arg.rfind("--input-message=", 0) == 0) {
|
||||
std::string type = arg.substr(16);
|
||||
if (type == "content_only") {
|
||||
opts.input_message = input_message_type::CONTENT_ONLY;
|
||||
} else if (type == "reasoning_content") {
|
||||
opts.input_message = input_message_type::REASONING_CONTENT;
|
||||
} else if (type == "tool_call_only") {
|
||||
opts.input_message = input_message_type::TOOL_CALL_ONLY;
|
||||
} else if (type == "content_tool_call") {
|
||||
opts.input_message = input_message_type::CONTENT_TOOL_CALL;
|
||||
} else if (type == "reasoning_tool_call") {
|
||||
opts.input_message = input_message_type::REASONING_TOOL_CALL;
|
||||
} else if (type == "content_fake_tool_call") {
|
||||
opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
|
||||
} else if (type == "all") {
|
||||
opts.input_message = input_message_type::ALL;
|
||||
} else {
|
||||
LOG_ERR("Unknown input message type: %s\n", type.c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOG_ERR("Unknown option: %s\n", arg.c_str());
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static json build_user_message() {
|
||||
return json{
|
||||
{ "role", "user" },
|
||||
{ "content", "Hello, please help me with a task." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_only_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello! I'm here to help you with your task." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_reasoning_content_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "Hello! I'm here to help you with your task." },
|
||||
{ "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_tool_call_only_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", nullptr },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
|
||||
{ "id", "123456789" } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_tool_call_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "I'll help you by calling a function." },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function",
|
||||
json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_reasoning_tool_call_message() {
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", nullptr },
|
||||
{ "reasoning_content", "I need to call a function to help with this task." },
|
||||
{ "tool_calls",
|
||||
json::array({ json{
|
||||
{ "type", "function" },
|
||||
{ "function",
|
||||
json{ { "name", "test_function_name" },
|
||||
{ "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_content_fake_tool_call_message() {
|
||||
// This message has content but NO tool_calls field
|
||||
// It's used to test if a template renders tool definitions but not tool calls
|
||||
return json{
|
||||
{ "role", "assistant" },
|
||||
{ "content", "I'll help you by calling a function." }
|
||||
};
|
||||
}
|
||||
|
||||
static json build_tools_definition() {
|
||||
json parameters_schema = json::object();
|
||||
parameters_schema["type"] = "object";
|
||||
parameters_schema["properties"] = json::object();
|
||||
parameters_schema["properties"]["param1"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "First parameter" }
|
||||
});
|
||||
parameters_schema["properties"]["param2"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "Second parameter" }
|
||||
});
|
||||
parameters_schema["required"] = json::array({ "param1" });
|
||||
|
||||
return json::array({
|
||||
json{ { "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "description", "A test function for debugging" },
|
||||
{ "parameters", parameters_schema } } } }
|
||||
});
|
||||
}
|
||||
|
||||
static void render_scenario(const common_chat_template & tmpl,
|
||||
const std::string & scenario_name,
|
||||
const json & messages,
|
||||
const json & tools,
|
||||
bool add_generation_prompt,
|
||||
bool enable_thinking) {
|
||||
LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
|
||||
LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
|
||||
enable_thinking ? "true" : "false");
|
||||
|
||||
// When add_generation_prompt is true, add a trailing user message to trigger the prompt
|
||||
json final_messages = messages;
|
||||
if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
|
||||
final_messages.push_back(json{
|
||||
{ "role", "user" },
|
||||
{ "content", "Now please continue with another response." }
|
||||
});
|
||||
}
|
||||
|
||||
LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
|
||||
|
||||
try {
|
||||
autoparser::templates_params inputs;
|
||||
inputs.messages = final_messages;
|
||||
inputs.add_generation_prompt = add_generation_prompt;
|
||||
inputs.extra_context["enable_thinking"] = enable_thinking;
|
||||
|
||||
if (!tools.is_null() && tools.is_array() && !tools.empty()) {
|
||||
inputs.tools = tools;
|
||||
}
|
||||
|
||||
std::string output = common_chat_template_direct_apply(tmpl, inputs);
|
||||
|
||||
LOG_ERR("\n--- Rendered Output ---\n");
|
||||
LOG_ERR("%s\n", output.c_str());
|
||||
LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Rendering failed: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
static void render_all_scenarios(const common_chat_template & tmpl,
|
||||
const json & tools,
|
||||
bool add_generation_prompt,
|
||||
bool enable_thinking,
|
||||
input_message_type message_type) {
|
||||
json user_msg = build_user_message();
|
||||
|
||||
auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
|
||||
if (message_type == input_message_type::ALL || message_type == type) {
|
||||
json messages = json::array({ user_msg, assistant_msg });
|
||||
render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
|
||||
}
|
||||
};
|
||||
|
||||
render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
|
||||
render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
|
||||
render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
|
||||
render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
|
||||
render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
|
||||
render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
|
||||
build_content_fake_tool_call_message());
|
||||
|
||||
// Also render with add_generation_prompt=true to show the prompt ending
|
||||
if (message_type == input_message_type::ALL) {
|
||||
LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
|
||||
|
||||
json prompt_messages = json::array({ user_msg });
|
||||
render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
|
||||
|
||||
// With enable_thinking toggled
|
||||
render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
// Set log level to most verbose to capture all debug output
|
||||
common_log_set_verbosity_thold(99);
|
||||
|
||||
debug_options opts;
|
||||
if (!parse_options(argc, argv, opts)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
|
||||
jinja::enable_debug(true);
|
||||
}
|
||||
|
||||
std::string template_source;
|
||||
try {
|
||||
// Check if the file is a GGUF file
|
||||
if (opts.template_path.size() >= 5 &&
|
||||
opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
|
||||
template_source = read_gguf_chat_template(opts.template_path);
|
||||
} else {
|
||||
template_source = read_file(opts.template_path);
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error reading template: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
|
||||
LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
|
||||
opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
|
||||
|
||||
try {
|
||||
common_chat_template chat_template(template_source, "", "");
|
||||
|
||||
// Build tools definition
|
||||
json tools = opts.with_tools ? build_tools_definition() : json();
|
||||
|
||||
// Render template scenarios if requested
|
||||
if (opts.input_message != input_message_type::NONE &&
|
||||
(opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE RENDERING OUTPUT\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
|
||||
render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
|
||||
opts.input_message);
|
||||
}
|
||||
|
||||
// Output analysis if requested
|
||||
if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE ANALYSIS\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
|
||||
autoparser::autoparser analysis;
|
||||
analysis.analyze_template(chat_template);
|
||||
|
||||
// Generate Parser
|
||||
autoparser::templates_params params;
|
||||
params.messages = json::array({ build_user_message() });
|
||||
params.reasoning_format =
|
||||
opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
|
||||
params.enable_thinking = opts.enable_reasoning;
|
||||
params.add_generation_prompt = opts.generation_prompt;
|
||||
|
||||
if (opts.with_tools) {
|
||||
params.tools = tools;
|
||||
params.tool_choice = opts.force_tool_call ? COMMON_CHAT_TOOL_CHOICE_REQUIRED : COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
} else {
|
||||
params.tools = json();
|
||||
params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
}
|
||||
params.parallel_tool_calls = false;
|
||||
|
||||
auto parser_data = autoparser::peg_generator::generate_parser(chat_template, params, analysis);
|
||||
|
||||
LOG_ERR("\n=== Generated Parser ===\n");
|
||||
common_peg_arena arena;
|
||||
arena.load(parser_data.parser);
|
||||
LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
|
||||
|
||||
LOG_ERR("\n=== Generated Grammar ===\n");
|
||||
LOG_ERR("%s\n", parser_data.grammar.c_str());
|
||||
|
||||
LOG_ERR("\n=== Generated Lazy Grammar ===\n");
|
||||
LOG_ERR("%d\n", parser_data.grammar_lazy);
|
||||
|
||||
LOG_ERR("\n=== Generated Grammar Triggers ===\n");
|
||||
for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
|
||||
LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
|
||||
}
|
||||
|
||||
LOG_ERR("\n=== Preserved Tokens ===\n");
|
||||
for (const std::string & token : parser_data.preserved_tokens) {
|
||||
LOG_ERR(" '%s'\n", token.c_str());
|
||||
}
|
||||
|
||||
if (!parser_data.grammar.empty()) {
|
||||
LOG_ERR("\n=== Verifying created grammar ===\n");
|
||||
auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
|
||||
parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
|
||||
if (grammar != nullptr) {
|
||||
LOG_ERR("\n=== Grammar successfully created ===\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Analysis failed: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
611
tools/parser/template-analysis.cpp
Normal file
611
tools/parser/template-analysis.cpp
Normal file
@@ -0,0 +1,611 @@
|
||||
#include "chat-auto-parser.h"
|
||||
#include "chat-auto-parser-helpers.h"
|
||||
#include "chat.h"
|
||||
#include "log.h"
|
||||
#include "jinja/caps.h"
|
||||
#include "jinja/runtime.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
// ANSI color codes - using 256-color palette for brighter colors (all bold)
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_PURPLE "\033[1m\x1b[38;5;126m" // Bold bright purple for main headers
|
||||
#define ANSI_CYAN "\033[1m\x1b[38;5;81m" // Bold bright cyan for section headers
|
||||
#define ANSI_BLUE "\033[1m\x1b[38;5;12m" // Bold bright blue for labels
|
||||
#define ANSI_ORANGE "\033[1m\x1b[38;5;209m" // Bold orange for right differences
|
||||
#define ANSI_GREEN "\033[1m\x1b[38;5;83m" // Bold bright green for left differences
|
||||
#define ANSI_GRAY "\033[1m\x1b[38;5;240m" // Bold gray (used for "no variables" message)
|
||||
#define ANSI_BOLD "\033[1m" // Standalone bold
|
||||
#define ANSI_PREFIX "\033[1m\x1b[38;5;176m" // Bold color for common prefix
|
||||
#define ANSI_SUFFIX "\033[1m\x1b[38;5;61m" // Bold color for common suffix
|
||||
|
||||
// All template paths extracted from tests/test-chat.cpp
|
||||
static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
|
||||
"models/templates/Apertus-8B-Instruct.jinja",
|
||||
"models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
|
||||
"models/templates/ByteDance-Seed-OSS.jinja",
|
||||
"models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
|
||||
"models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
|
||||
"models/templates/GLM-4.6.jinja",
|
||||
"models/templates/GLM-4.7-Flash.jinja",
|
||||
"models/templates/Kimi-K2-Instruct.jinja",
|
||||
"models/templates/Kimi-K2-Thinking.jinja",
|
||||
"models/templates/MiMo-VL.jinja",
|
||||
"models/templates/MiniMax-M2.jinja",
|
||||
"models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
|
||||
"models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
|
||||
"models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
|
||||
"models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
|
||||
"models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
|
||||
"models/templates/Qwen-QwQ-32B.jinja",
|
||||
"models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
|
||||
"models/templates/Qwen3-Coder.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
|
||||
"models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
|
||||
"models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
|
||||
"models/templates/google-gemma-2-2b-it.jinja",
|
||||
"models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
|
||||
"models/templates/llama-cpp-deepseek-r1.jinja",
|
||||
"models/templates/meetkai-functionary-medium-v3.1.jinja",
|
||||
"models/templates/meetkai-functionary-medium-v3.2.jinja",
|
||||
"models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
|
||||
"models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
|
||||
"models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
|
||||
"models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
|
||||
"models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
|
||||
"models/templates/moonshotai-Kimi-K2.jinja",
|
||||
"models/templates/openai-gpt-oss-120b.jinja",
|
||||
"models/templates/unsloth-Apriel-1.5.jinja",
|
||||
"models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
|
||||
};
|
||||
|
||||
struct analysis_options {
|
||||
std::vector<std::string> template_paths;
|
||||
bool analyze_all = false;
|
||||
};
|
||||
|
||||
static std::string read_file(const std::string & path) {
|
||||
std::ifstream fin(path, std::ios::binary);
|
||||
if (!fin.is_open()) {
|
||||
throw std::runtime_error("Could not open file: " + path);
|
||||
}
|
||||
std::ostringstream buf;
|
||||
buf << fin.rdbuf();
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
static void print_usage(const char * program_name) {
|
||||
LOG_ERR("Usage: %s [options]\n", program_name);
|
||||
LOG_ERR("\nOptions:\n");
|
||||
LOG_ERR(" --template <name> Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
|
||||
LOG_ERR(" --template-file <path> Analyze custom template file\n");
|
||||
LOG_ERR(" --all Analyze all templates from test suite\n");
|
||||
LOG_ERR("\nExamples:\n");
|
||||
LOG_ERR(" %s --all\n", program_name);
|
||||
LOG_ERR(" %s --template deepseek\n", program_name);
|
||||
LOG_ERR(" %s --template-file my-template.jinja\n", program_name);
|
||||
}
|
||||
|
||||
static bool parse_options(int argc, char ** argv, analysis_options & opts) {
|
||||
if (argc < 2) {
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "--all") {
|
||||
opts.analyze_all = true;
|
||||
} else if (arg == "--template") {
|
||||
if (i + 1 >= argc) {
|
||||
LOG_ERR("--template requires an argument\n");
|
||||
return false;
|
||||
}
|
||||
std::string pattern = argv[++i];
|
||||
std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
|
||||
|
||||
// Find matching templates
|
||||
bool found = false;
|
||||
for (const auto & path : ALL_TEMPLATE_PATHS) {
|
||||
std::string path_lower = path;
|
||||
std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
|
||||
if (path_lower.find(pattern) != std::string::npos) {
|
||||
opts.template_paths.push_back(path);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
LOG_ERR("No templates found matching: %s\n", pattern.c_str());
|
||||
return false;
|
||||
}
|
||||
} else if (arg == "--template-file") {
|
||||
if (i + 1 >= argc) {
|
||||
LOG_ERR("--template-file requires an argument\n");
|
||||
return false;
|
||||
}
|
||||
opts.template_paths.push_back(argv[++i]);
|
||||
} else {
|
||||
LOG_ERR("Unknown option: %s\n", arg.c_str());
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.analyze_all) {
|
||||
opts.template_paths = ALL_TEMPLATE_PATHS;
|
||||
}
|
||||
|
||||
if (opts.template_paths.empty()) {
|
||||
LOG_ERR("No templates specified\n");
|
||||
print_usage(argv[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static json build_tools_definition() {
|
||||
json parameters_schema = json::object();
|
||||
parameters_schema["type"] = "object";
|
||||
parameters_schema["properties"] = json::object();
|
||||
parameters_schema["properties"]["param1"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "First parameter" }
|
||||
});
|
||||
parameters_schema["properties"]["param2"] = json::object({
|
||||
{ "type", "string" },
|
||||
{ "description", "Second parameter" }
|
||||
});
|
||||
parameters_schema["required"] = json::array({ "param1", "param2" });
|
||||
|
||||
return json::array({
|
||||
json{ { "type", "function" },
|
||||
{ "function", json{ { "name", "test_function_name" },
|
||||
{ "description", "A test function for debugging" },
|
||||
{ "parameters", parameters_schema } } } }
|
||||
});
|
||||
}
|
||||
|
||||
// Helper to create a tool call with arguments as JSON object
|
||||
static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
|
||||
return json{
|
||||
{"id", id},
|
||||
{"type", "function"},
|
||||
{"function", json{
|
||||
{"name", name},
|
||||
{"arguments", args_object} // Pass as JSON object, not serialized string
|
||||
}}
|
||||
};
|
||||
}
|
||||
|
||||
// Helper functions to create repeating message definitions
|
||||
static json make_user_msg() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Hello, please help me."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_user_msg2() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Thank you."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_user_msg2_continue() {
|
||||
return json{
|
||||
{"role", "user"},
|
||||
{"content", "Continue."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_no_tool() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "Let me help you."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_one_tool() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
||||
})}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_two_tools() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
|
||||
})}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_no_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "I can help you with that."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_with_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", "I can help you with that."},
|
||||
{"reasoning_content", "The user is asking for help. I should respond positively."}
|
||||
};
|
||||
}
|
||||
|
||||
static json make_assistant_one_tool_with_reasoning() {
|
||||
return json{
|
||||
{"role", "assistant"},
|
||||
{"content", nullptr},
|
||||
{"tool_calls", json::array({
|
||||
build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
|
||||
})},
|
||||
{"reasoning_content", "I need to call the tool first."}
|
||||
};
|
||||
}
|
||||
|
||||
static void print_diff_split(const std::string & title, const diff_split & diff) {
|
||||
LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
|
||||
LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
|
||||
LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
|
||||
LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
|
||||
LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
|
||||
}
|
||||
|
||||
static void check_reasoning_variables(const common_chat_template & tmpl) {
|
||||
LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
|
||||
|
||||
try {
|
||||
// Create a list of candidate reasoning/thinking variable names to probe
|
||||
std::vector<std::string> candidate_vars = {
|
||||
"enable_reasoning",
|
||||
"use_reasoning",
|
||||
"reasoning_enabled",
|
||||
"has_reasoning",
|
||||
"reasoning_mode",
|
||||
"reasoning_format",
|
||||
"reasoning_active",
|
||||
"with_reasoning",
|
||||
"use_thinking",
|
||||
"thinking_enabled",
|
||||
"has_thinking",
|
||||
"thinking_mode",
|
||||
"thinking_format",
|
||||
"thinking_active",
|
||||
"with_thinking",
|
||||
"enable_reason",
|
||||
"reason_enabled",
|
||||
"enable_think",
|
||||
"think_enabled",
|
||||
};
|
||||
|
||||
jinja::context ctx;
|
||||
ctx.is_get_stats = true;
|
||||
|
||||
json messages = json::array({
|
||||
json{
|
||||
{"role", "user"},
|
||||
{"content", "Test message"}
|
||||
},
|
||||
json{
|
||||
{"role", "assistant"},
|
||||
{"content", "Response"},
|
||||
{"reasoning_content", "Some reasoning"}
|
||||
}
|
||||
});
|
||||
|
||||
// Set up base context
|
||||
jinja::global_from_json(ctx, json{
|
||||
{"messages", messages},
|
||||
{"tools", json::array()},
|
||||
{"bos_token", ""},
|
||||
{"eos_token", ""},
|
||||
{"add_generation_prompt", false},
|
||||
{"enable_thinking", true} // Already passed, so we'll exclude this from results
|
||||
}, true);
|
||||
|
||||
// Add candidate variables as undefined to probe which ones are accessed
|
||||
for (const auto & var_name : candidate_vars) {
|
||||
ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
|
||||
}
|
||||
|
||||
try {
|
||||
jinja::runtime runtime(ctx);
|
||||
runtime.execute(tmpl.prog);
|
||||
} catch (const std::exception & e) {
|
||||
// Execution may fail, that's okay - we just want to see what variables were accessed
|
||||
}
|
||||
|
||||
// Check which candidate variables were accessed (stats.used = true)
|
||||
std::vector<std::string> accessed_vars;
|
||||
for (const auto & var_name : candidate_vars) {
|
||||
auto val = ctx.get_val(var_name);
|
||||
if (!val->is_undefined()) {
|
||||
// Variable was overwritten, skip it
|
||||
continue;
|
||||
}
|
||||
if (val->stats.used) {
|
||||
accessed_vars.push_back(var_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (accessed_vars.empty()) {
|
||||
LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
|
||||
} else {
|
||||
LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
|
||||
for (const auto & var : accessed_vars) {
|
||||
LOG_ERR(" %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error checking reasoning variables: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
static void analyze_template(const std::string & template_path) {
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_PURPLE);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" ANALYZING TEMPLATE: %s\n", template_path.c_str());
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
|
||||
std::string template_source;
|
||||
try {
|
||||
template_source = read_file(template_path);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Error reading template: %s\n", e.what());
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
common_chat_template chat_template(template_source, "", "");
|
||||
json tools = build_tools_definition();
|
||||
|
||||
// ===== CAPABILITIES ANALYSIS =====
|
||||
LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
|
||||
auto caps = chat_template.original_caps();
|
||||
LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
|
||||
LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
|
||||
LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
|
||||
LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
|
||||
LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false");
|
||||
LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false");
|
||||
|
||||
// ===== DIFFERENTIAL ANALYSIS =====
|
||||
|
||||
// Test 1: With and without tools (single user message)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_tools;
|
||||
params_no_tools.messages = json::array({ user_msg });
|
||||
params_no_tools.add_generation_prompt = false;
|
||||
params_no_tools.tools = json::array();
|
||||
|
||||
autoparser::templates_params params_with_tools = params_no_tools;
|
||||
params_with_tools.tools = tools;
|
||||
|
||||
std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
|
||||
std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tools, output_with_tools);
|
||||
print_diff_split("Diff: With vs Without Tools (single user message)", diff);
|
||||
}
|
||||
|
||||
// Test 2: With and without add_generation_prompt (single user message)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_prompt;
|
||||
params_no_prompt.messages = json::array({ user_msg });
|
||||
params_no_prompt.add_generation_prompt = false;
|
||||
params_no_prompt.tools = json::array();
|
||||
|
||||
autoparser::templates_params params_with_prompt = params_no_prompt;
|
||||
params_with_prompt.add_generation_prompt = true;
|
||||
|
||||
std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
|
||||
std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
|
||||
print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
|
||||
}
|
||||
|
||||
// Test 3: Assistant with reasoning_content (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 4: Assistant with reasoning_content (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 5: Tool call in last assistant message (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_tool;
|
||||
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
|
||||
params_no_tool.add_generation_prompt = false;
|
||||
params_no_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_with_tool = params_no_tool;
|
||||
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
|
||||
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
|
||||
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
|
||||
print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 6: Tool call in last assistant message (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2_continue();
|
||||
|
||||
autoparser::templates_params params_no_tool;
|
||||
params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
|
||||
params_no_tool.add_generation_prompt = false;
|
||||
params_no_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_with_tool = params_no_tool;
|
||||
params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
|
||||
|
||||
std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
|
||||
std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_tool, output_with_tool);
|
||||
print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 7: One vs two tool calls (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_one_tool;
|
||||
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
params_one_tool.add_generation_prompt = false;
|
||||
params_one_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_two_tools = params_one_tool;
|
||||
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
|
||||
|
||||
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
|
||||
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
|
||||
print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Test 8: One vs two tool calls (user, assistant, user)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
json user_msg2 = make_user_msg2_continue();
|
||||
|
||||
autoparser::templates_params params_one_tool;
|
||||
params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
|
||||
params_one_tool.add_generation_prompt = false;
|
||||
params_one_tool.tools = tools;
|
||||
|
||||
autoparser::templates_params params_two_tools = params_one_tool;
|
||||
params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
|
||||
|
||||
std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
|
||||
std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
|
||||
|
||||
auto diff = calculate_diff_split(output_one_tool, output_two_tools);
|
||||
print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
|
||||
}
|
||||
|
||||
// Test 9: Tool call with vs without reasoning_content (user, assistant)
|
||||
{
|
||||
json user_msg = make_user_msg();
|
||||
|
||||
autoparser::templates_params params_no_reasoning;
|
||||
params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
|
||||
params_no_reasoning.add_generation_prompt = false;
|
||||
params_no_reasoning.tools = tools;
|
||||
params_no_reasoning.enable_thinking = true;
|
||||
|
||||
autoparser::templates_params params_with_reasoning = params_no_reasoning;
|
||||
params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
|
||||
|
||||
std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
|
||||
std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
|
||||
|
||||
auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
|
||||
print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
|
||||
}
|
||||
|
||||
// Check reasoning variables
|
||||
check_reasoning_variables(chat_template);
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Analysis failed: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
// Set log level to capture all output
|
||||
common_log_set_verbosity_thold(99);
|
||||
|
||||
analysis_options opts;
|
||||
if (!parse_options(argc, argv, opts)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_PURPLE);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" TEMPLATE ANALYSIS TOOL\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
|
||||
|
||||
for (const auto & path : opts.template_paths) {
|
||||
analyze_template(path);
|
||||
}
|
||||
|
||||
LOG_ERR("\n");
|
||||
LOG_ERR("%s", ANSI_GREEN);
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR(" ANALYSIS COMPLETE\n");
|
||||
LOG_ERR("================================================================================\n");
|
||||
LOG_ERR("%s", ANSI_RESET);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -729,6 +729,10 @@ export class SchemaConverter {
|
||||
return this._addRule(ruleName, out.join(''));
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||
} else if (schemaType === undefined && typeof schema === 'object' && !Array.isArray(schema) && schema !== null) {
|
||||
// No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
|
||||
// Per JSON Schema semantics this is equivalent to {} and accepts any value.
|
||||
return this._addRule(ruleName, this._addPrimitive('value', PRIMITIVE_RULES['value']));
|
||||
} else {
|
||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#include "server-common.h"
|
||||
#include "server-task.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "chat.h"
|
||||
#include "common.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "llama.h"
|
||||
#include "sampling.h"
|
||||
#include "speculative.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "server-common.h"
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
@@ -157,7 +157,8 @@ json task_params::to_json(bool only_metrics) const {
|
||||
common_chat_msg task_result_state::update_chat_msg(
|
||||
const std::string & text_added,
|
||||
bool is_partial,
|
||||
std::vector<common_chat_msg_diff> & diffs) {
|
||||
std::vector<common_chat_msg_diff> & diffs,
|
||||
bool filter_tool_calls) {
|
||||
generated_text += text_added;
|
||||
auto msg_prv_copy = chat_msg;
|
||||
SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
|
||||
@@ -168,7 +169,64 @@ common_chat_msg task_result_state::update_chat_msg(
|
||||
if (!new_msg.empty()) {
|
||||
new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
|
||||
chat_msg = new_msg;
|
||||
diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
|
||||
auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
|
||||
|
||||
if (!filter_tool_calls) {
|
||||
diffs = std::move(all_diffs);
|
||||
} else {
|
||||
for (auto & d : all_diffs) {
|
||||
// If this is a new type of delta, flush all currently pending tool call names
|
||||
for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
|
||||
if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
|
||||
common_chat_msg_diff header;
|
||||
header.tool_call_index = i;
|
||||
header.tool_call_delta.id = chat_msg.tool_calls[i].id;
|
||||
header.tool_call_delta.name = chat_msg.tool_calls[i].name;
|
||||
diffs.push_back(std::move(header));
|
||||
sent_tool_call_names.insert(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (d.tool_call_index == std::string::npos) {
|
||||
diffs.push_back(std::move(d));
|
||||
} else {
|
||||
size_t i = d.tool_call_index;
|
||||
if (sent_tool_call_names.count(i)) {
|
||||
if (!d.tool_call_delta.arguments.empty()) {
|
||||
d.tool_call_delta.name = "";
|
||||
d.tool_call_delta.id = "";
|
||||
diffs.push_back(std::move(d));
|
||||
}
|
||||
} else {
|
||||
// Not sent yet.
|
||||
if (!d.tool_call_delta.arguments.empty() || !is_partial) {
|
||||
d.tool_call_delta.name = chat_msg.tool_calls[i].name;
|
||||
d.tool_call_delta.id = chat_msg.tool_calls[i].id;
|
||||
diffs.push_back(std::move(d));
|
||||
sent_tool_call_names.insert(i);
|
||||
} else {
|
||||
// Suppress
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Final check at EOF
|
||||
if (!is_partial) {
|
||||
for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
|
||||
if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
|
||||
common_chat_msg_diff header;
|
||||
header.tool_call_index = i;
|
||||
header.tool_call_delta.id = chat_msg.tool_calls[i].id;
|
||||
header.tool_call_delta.name = chat_msg.tool_calls[i].name;
|
||||
diffs.push_back(std::move(header));
|
||||
sent_tool_call_names.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return chat_msg;
|
||||
}
|
||||
|
||||
@@ -98,6 +98,7 @@ struct task_result_state {
|
||||
common_chat_msg chat_msg;
|
||||
std::string generated_text; // append new chunks of generated text here
|
||||
std::vector<std::string> generated_tool_call_ids;
|
||||
std::unordered_set<size_t> sent_tool_call_names;
|
||||
|
||||
// for OpenAI Responses and Anthropic streaming API:
|
||||
// track output item / content block state across chunks
|
||||
@@ -120,7 +121,8 @@ struct task_result_state {
|
||||
common_chat_msg update_chat_msg(
|
||||
const std::string & text_added,
|
||||
bool is_partial,
|
||||
std::vector<common_chat_msg_diff> & diffs);
|
||||
std::vector<common_chat_msg_diff> & diffs,
|
||||
bool filter_tool_calls = false);
|
||||
};
|
||||
|
||||
struct server_task {
|
||||
|
||||
@@ -100,18 +100,19 @@ def do_test_completion_with_required_tool_tiny(server: ServerProcess, tool: dict
|
||||
assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
|
||||
# assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
|
||||
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
|
||||
assert expected_function_name == tool_call["function"]["name"]
|
||||
assert expected_function_name == tool_call["function"]["name"], f'Expected tool name to be {tool_call["function"]["name"]} in {choice["message"]}'
|
||||
actual_arguments = tool_call["function"]["arguments"]
|
||||
assert isinstance(actual_arguments, str)
|
||||
assert isinstance(actual_arguments, dict) or isinstance(actual_arguments, str), f'Expected arguments to be a dict or str, got: {actual_arguments}'
|
||||
if argument_key is not None:
|
||||
actual_arguments = json.loads(actual_arguments)
|
||||
assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
|
||||
if (isinstance(actual_arguments, str)):
|
||||
actual_arguments = json.loads(actual_arguments)
|
||||
assert argument_key in actual_arguments, f"tool arguments: {actual_arguments}, expected: {argument_key}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED])
|
||||
@pytest.mark.parametrize("template_name,tool,argument_key", [
|
||||
("google-gemma-2-2b-it", TEST_TOOL, "success"),
|
||||
("google-gemma-2-2b-it", TEST_TOOL, "success"),
|
||||
("Qwen3-Coder", TEST_TOOL, "success"),
|
||||
("Qwen3-Coder", TEST_TOOL, "success"),
|
||||
("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"),
|
||||
("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"),
|
||||
("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"),
|
||||
|
||||
Reference in New Issue
Block a user