mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-16 14:04:05 +00:00
webui: preserve partial response on streaming error (#23090)
This commit is contained in:
@@ -4,9 +4,6 @@ export const ATTACHMENT_SAVED_REGEX = /\[Attachment saved: ([^\]]+)\]/;
|
||||
|
||||
export const NEWLINE_SEPARATOR = '\n';
|
||||
|
||||
export const LLM_ERROR_BLOCK_START = '\n\n```\nUpstream LLM error:\n';
|
||||
export const LLM_ERROR_BLOCK_END = '\n```\n';
|
||||
|
||||
export const DEFAULT_AGENTIC_CONFIG: AgenticConfig = {
|
||||
enabled: true,
|
||||
maxTurns: 100,
|
||||
|
||||
@@ -30,12 +30,7 @@ import { ToolSource, ToolPermissionDecision } from '$lib/enums';
|
||||
import { SvelteMap } from 'svelte/reactivity';
|
||||
import { ToolsService } from '$lib/services/tools.service';
|
||||
import { isAbortError } from '$lib/utils';
|
||||
import {
|
||||
DEFAULT_AGENTIC_CONFIG,
|
||||
NEWLINE_SEPARATOR,
|
||||
LLM_ERROR_BLOCK_START,
|
||||
LLM_ERROR_BLOCK_END
|
||||
} from '$lib/constants';
|
||||
import { DEFAULT_AGENTIC_CONFIG, NEWLINE_SEPARATOR } from '$lib/constants';
|
||||
import {
|
||||
IMAGE_MIME_TO_EXTENSION,
|
||||
DATA_URI_BASE64_REGEX,
|
||||
@@ -640,10 +635,9 @@ class AgenticStore {
|
||||
return;
|
||||
}
|
||||
const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
|
||||
// Save error as content in the current turn
|
||||
onChunk?.(`${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`);
|
||||
// preserve partial output as is, the outer error dialog informs the user separately
|
||||
await onAssistantTurnComplete?.(
|
||||
turnContent + `${LLM_ERROR_BLOCK_START}${normalizedError.message}${LLM_ERROR_BLOCK_END}`,
|
||||
turnContent,
|
||||
turnReasoningContent || undefined,
|
||||
this.buildFinalTimings(capturedTimings, agenticTimings),
|
||||
undefined
|
||||
|
||||
@@ -814,7 +814,7 @@ class ChatStore {
|
||||
);
|
||||
}
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
onError: async (error: Error) => {
|
||||
this.setStreamingActive(false);
|
||||
if (isAbortError(error)) {
|
||||
cleanupStreamingState();
|
||||
@@ -826,13 +826,10 @@ class ChatStore {
|
||||
return;
|
||||
}
|
||||
console.error('Streaming error:', error);
|
||||
// keep whatever was streamed so far, the message stays in memory and in DB
|
||||
await this.savePartialResponseIfNeeded(convId);
|
||||
cleanupStreamingState();
|
||||
this.clearPendingMessage(convId);
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
if (idx !== -1) {
|
||||
const failedMessage = conversationsStore.removeMessageAtIndex(idx);
|
||||
if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error);
|
||||
}
|
||||
const contextInfo = (
|
||||
error as Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }
|
||||
).contextInfo;
|
||||
@@ -1389,9 +1386,17 @@ class ChatStore {
|
||||
}
|
||||
|
||||
console.error('Continue generation error:', error);
|
||||
conversationsStore.updateMessageAtIndex(idx, { content: originalContent });
|
||||
|
||||
await DatabaseService.updateMessage(msg.id, { content: originalContent });
|
||||
// keep whatever was appended so far, the message stays in memory and in DB
|
||||
await DatabaseService.updateMessage(msg.id, {
|
||||
content: originalContent + appendedContent,
|
||||
reasoningContent: originalReasoning + appendedReasoning || undefined,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
conversationsStore.updateMessageAtIndex(idx, {
|
||||
content: originalContent + appendedContent,
|
||||
reasoningContent: originalReasoning + appendedReasoning || undefined,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
|
||||
this.setChatLoading(msg.convId, false);
|
||||
this.clearChatStreaming(msg.convId);
|
||||
|
||||
Reference in New Issue
Block a user