mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-03-17 16:44:07 +00:00
server: /v1/responses (partial) (#18486)
* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
This commit is contained in:
@@ -1069,6 +1069,283 @@ json oaicompat_chat_params_parse(
|
||||
return llama_params;
|
||||
}
|
||||
|
||||
json convert_responses_to_chatcmpl(const json & response_body) {
|
||||
if (!response_body.contains("input")) {
|
||||
throw std::invalid_argument("'input' is required");
|
||||
}
|
||||
if (!json_value(response_body, "previous_response_id", std::string{}).empty()) {
|
||||
throw std::invalid_argument("llama.cpp does not support 'previous_response_id'.");
|
||||
}
|
||||
|
||||
const json input_value = response_body.at("input");
|
||||
json chatcmpl_body = response_body;
|
||||
chatcmpl_body.erase("input");
|
||||
std::vector<json> chatcmpl_messages;
|
||||
|
||||
if (response_body.contains("instructions")) {
|
||||
chatcmpl_messages.push_back({
|
||||
{"role", "system"},
|
||||
{"content", json_value(response_body, "instructions", std::string())},
|
||||
});
|
||||
chatcmpl_body.erase("instructions");
|
||||
}
|
||||
|
||||
if (input_value.is_string()) {
|
||||
// #responses_create-input-text_input
|
||||
chatcmpl_messages.push_back({
|
||||
{"role", "user"},
|
||||
{"content", input_value},
|
||||
});
|
||||
} else if (input_value.is_array()) {
|
||||
// #responses_create-input-input_item_list
|
||||
|
||||
static auto exists_and_is_array = [](const json & j, const char * key) -> bool {
|
||||
return j.contains(key) && j.at(key).is_array();
|
||||
};
|
||||
static auto exists_and_is_string = [](const json & j, const char * key) -> bool {
|
||||
return j.contains(key) && j.at(key).is_string();
|
||||
};
|
||||
|
||||
for (json item : input_value) {
|
||||
if (exists_and_is_string(item, "content")) {
|
||||
// #responses_create-input-input_item_list-input_message-content-text_input
|
||||
// Only "Input message" contains item["content"]::string
|
||||
// After converting item["content"]::string to item["content"]::array,
|
||||
// we can treat "Input message" as sum of "Item-Input message" and "Item-Output message"
|
||||
item["content"] = json::array({
|
||||
json {
|
||||
{"text", item.at("content")},
|
||||
{"type", "input_text"}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (exists_and_is_array(item, "content") &&
|
||||
exists_and_is_string(item, "role") &&
|
||||
(item.at("role") == "user" ||
|
||||
item.at("role") == "system" ||
|
||||
item.at("role") == "developer")
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-input_message
|
||||
std::vector<json> chatcmpl_content;
|
||||
|
||||
for (const json & input_item : item.at("content")) {
|
||||
const std::string type = json_value(input_item, "type", std::string());
|
||||
|
||||
if (type == "input_text") {
|
||||
if (!input_item.contains("text")) {
|
||||
throw std::invalid_argument("'Input text' requires 'text'");
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"text", input_item.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
} else if (type == "input_image") {
|
||||
// While `detail` is marked as required,
|
||||
// it has default value("auto") and can be omitted.
|
||||
|
||||
if (!input_item.contains("image_url")) {
|
||||
throw std::invalid_argument("'image_url' is required");
|
||||
}
|
||||
chatcmpl_content.push_back({
|
||||
{"image_url", json {
|
||||
{"url", input_item.at("image_url")}
|
||||
}},
|
||||
{"type", "image_url"},
|
||||
});
|
||||
} else if (type == "input_file") {
|
||||
throw std::invalid_argument("'input_file' is not supported by llamacpp at this moment");
|
||||
// if (input_item.contains("file_url")) {
|
||||
// // chat completion API does not support file_url
|
||||
// throw std::invalid_argument("'file_url' is not supported");
|
||||
// }
|
||||
// if (!input_item.contains("file_data") || !input_item.contains("filename")) {
|
||||
// throw std::invalid_argument("Both 'file_data' and 'filename' are required");
|
||||
// }
|
||||
// chatcmpl_content.push_back({
|
||||
// {"file", json {
|
||||
// {"file_data", input_item.at("file_data")},
|
||||
// {"filename", input_item.at("filename")},
|
||||
// }},
|
||||
// {"type", "file"},
|
||||
// });
|
||||
} else {
|
||||
throw std::invalid_argument("'type' must be one of 'input_text', 'input_image', or 'input_file'");
|
||||
}
|
||||
}
|
||||
|
||||
if (item.contains("type")) {
|
||||
item.erase("type");
|
||||
}
|
||||
if (item.contains("status")) {
|
||||
item.erase("status");
|
||||
}
|
||||
item["content"] = chatcmpl_content;
|
||||
|
||||
chatcmpl_messages.push_back(item);
|
||||
} else if (exists_and_is_array(item, "content") &&
|
||||
exists_and_is_string(item, "role") &&
|
||||
item.at("role") == "assistant" &&
|
||||
// exists_and_is_string(item, "status") &&
|
||||
// (item.at("status") == "in_progress" ||
|
||||
// item.at("status") == "completed" ||
|
||||
// item.at("status") == "incomplete") &&
|
||||
// item["status"] not sent by codex-cli
|
||||
exists_and_is_string(item, "type") &&
|
||||
item.at("type") == "message"
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-output_message
|
||||
std::vector<json> chatcmpl_content;
|
||||
|
||||
for (const auto & output_text : item.at("content")) {
|
||||
const std::string type = json_value(output_text, "type", std::string());
|
||||
if (type != "output_text") {
|
||||
throw std::invalid_argument("'type' must be 'output_text'");
|
||||
}
|
||||
if (!exists_and_is_string(output_text, "text")) {
|
||||
throw std::invalid_argument("'Output text' requires 'text'");
|
||||
}
|
||||
// Ignore annotations and logprobs for now
|
||||
chatcmpl_content.push_back({
|
||||
{"text", output_text.at("text")},
|
||||
{"type", "text"},
|
||||
});
|
||||
}
|
||||
|
||||
item.erase("status");
|
||||
item.erase("type");
|
||||
item["content"] = chatcmpl_content;
|
||||
chatcmpl_messages.push_back(item);
|
||||
} else if (exists_and_is_string(item, "arguments") &&
|
||||
exists_and_is_string(item, "call_id") &&
|
||||
exists_and_is_string(item, "name") &&
|
||||
exists_and_is_string(item, "type") &&
|
||||
item.at("type") == "function_call"
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-function_tool_call
|
||||
json msg = json {
|
||||
{"role", "assistant"},
|
||||
{"tool_calls", json::array({ json {
|
||||
{"function", json {
|
||||
{"arguments", item.at("arguments")},
|
||||
{"name", item.at("name")},
|
||||
}},
|
||||
{"id", item.at("call_id")},
|
||||
{"type", "function"},
|
||||
}})},
|
||||
};
|
||||
|
||||
if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
|
||||
// Move reasoning content from dummy message to tool call message
|
||||
msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
|
||||
chatcmpl_messages.pop_back();
|
||||
}
|
||||
chatcmpl_messages.push_back(msg);
|
||||
} else if (exists_and_is_string(item, "call_id") &&
|
||||
(exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
|
||||
exists_and_is_string(item, "type") &&
|
||||
item.at("type") == "function_call_output"
|
||||
) {
|
||||
// #responses_create-input-input_item_list-item-function_tool_call_output
|
||||
if (item.at("output").is_string()) {
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"content", item.at("output")},
|
||||
{"role", "tool"},
|
||||
{"tool_call_id", item.at("call_id")},
|
||||
});
|
||||
} else {
|
||||
json chatcmpl_outputs = item.at("output");
|
||||
for (json & chatcmpl_output : chatcmpl_outputs) {
|
||||
if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") {
|
||||
throw std::invalid_argument("Output of tool call should be 'Input text'");
|
||||
}
|
||||
chatcmpl_output["type"] = "text";
|
||||
}
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"content", chatcmpl_outputs},
|
||||
{"role", "tool"},
|
||||
{"tool_call_id", item.at("call_id")},
|
||||
});
|
||||
}
|
||||
} else if (// exists_and_is_string(item, "id") &&
|
||||
// item["id"] not sent by codex-cli
|
||||
exists_and_is_array(item, "summary") &&
|
||||
exists_and_is_string(item, "type") &&
|
||||
item.at("type") == "reasoning") {
|
||||
// #responses_create-input-input_item_list-item-reasoning
|
||||
|
||||
if (!exists_and_is_array(item, "content")) {
|
||||
throw std::invalid_argument("item['content'] is not an array");
|
||||
}
|
||||
if (item.at("content").empty()) {
|
||||
throw std::invalid_argument("item['content'] is empty");
|
||||
}
|
||||
if (!exists_and_is_string(item.at("content")[0], "text")) {
|
||||
throw std::invalid_argument("item['content']['text'] is not a string");
|
||||
}
|
||||
|
||||
// Pack reasoning content in dummy message
|
||||
chatcmpl_messages.push_back(json {
|
||||
{"role", "assistant"},
|
||||
{"content", json::array()},
|
||||
{"reasoning_content", item.at("content")[0].at("text")},
|
||||
});
|
||||
} else {
|
||||
throw std::invalid_argument("Cannot determine type of 'item'");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw std::invalid_argument("'input' must be a string or array of objects");
|
||||
}
|
||||
|
||||
// Remove unused dummy message which contains
|
||||
// reasoning content not followed by tool call
|
||||
chatcmpl_messages.erase(std::remove_if(
|
||||
chatcmpl_messages.begin(),
|
||||
chatcmpl_messages.end(),
|
||||
[](const json & x){ return x.contains("role") &&
|
||||
x.at("role") == "assistant" &&
|
||||
x.contains("content") &&
|
||||
x.at("content") == json::array() &&
|
||||
x.contains("reasoning_content");
|
||||
}),
|
||||
chatcmpl_messages.end()
|
||||
);
|
||||
|
||||
chatcmpl_body["messages"] = chatcmpl_messages;
|
||||
|
||||
if (response_body.contains("tools")) {
|
||||
if (!response_body.at("tools").is_array()) {
|
||||
throw std::invalid_argument("'tools' must be an array of objects");
|
||||
}
|
||||
std::vector<json> chatcmpl_tools;
|
||||
for (json resp_tool : response_body.at("tools")) {
|
||||
json chatcmpl_tool;
|
||||
|
||||
if (json_value(resp_tool, "type", std::string()) != "function") {
|
||||
throw std::invalid_argument("'type' of tool must be 'function'");
|
||||
}
|
||||
resp_tool.erase("type");
|
||||
chatcmpl_tool["type"] = "function";
|
||||
|
||||
if (!resp_tool.contains("strict")) {
|
||||
resp_tool["strict"] = true;
|
||||
}
|
||||
chatcmpl_tool["function"] = resp_tool;
|
||||
chatcmpl_tools.push_back(chatcmpl_tool);
|
||||
}
|
||||
chatcmpl_body.erase("tools");
|
||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||
}
|
||||
|
||||
if (response_body.contains("max_output_tokens")) {
|
||||
chatcmpl_body.erase("max_output_tokens");
|
||||
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
||||
}
|
||||
|
||||
return chatcmpl_body;
|
||||
}
|
||||
|
||||
json convert_anthropic_to_oai(const json & body) {
|
||||
json oai_body;
|
||||
|
||||
@@ -1482,6 +1759,24 @@ std::string format_oai_sse(const json & data) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string format_oai_resp_sse(const json & data) {
|
||||
std::ostringstream ss;
|
||||
auto send_single = [&ss](const json & event_obj) {
|
||||
ss << "event: " << event_obj.at("event").get<std::string>() << "\n";
|
||||
ss << "data: " << safe_json_to_str(event_obj.at("data")) << "\n\n";
|
||||
};
|
||||
|
||||
if (data.is_array()) {
|
||||
for (const auto & item : data) {
|
||||
send_single(item);
|
||||
}
|
||||
} else {
|
||||
send_single(data);
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string format_anthropic_sse(const json & data) {
|
||||
std::ostringstream ss;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user