From 6601471aac86e53b94bab64a44f2dfd55f4a2855 Mon Sep 17 00:00:00 2001 From: bielie Date: Thu, 2 Apr 2026 17:00:00 +0800 Subject: [PATCH] fix(story-summary): stabilize L0 extraction on upstream main --- .../vector/llm/atom-extraction.js | 27 +++++++++++++------ .../story-summary/vector/llm/llm-service.js | 25 +++++++---------- .../vector/pipeline/state-integration.js | 4 +-- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/modules/story-summary/vector/llm/atom-extraction.js b/modules/story-summary/vector/llm/atom-extraction.js index 1a600ef..d433d9f 100644 --- a/modules/story-summary/vector/llm/atom-extraction.js +++ b/modules/story-summary/vector/llm/atom-extraction.js @@ -17,8 +17,9 @@ const MODULE_ID = 'atom-extraction'; const CONCURRENCY = 10; const RETRY_COUNT = 2; const RETRY_DELAY = 500; -const DEFAULT_TIMEOUT = 20000; +const DEFAULT_TIMEOUT = 40000; const STAGGER_DELAY = 80; +const DEBUG_RAW_PREVIEW_LEN = 800; let batchCancelled = false; @@ -81,14 +82,18 @@ const SYSTEM_PROMPT = `你是场景摘要器。从一轮对话中提取1-2个场 输出: {"anchors":[{"scene":"火山口上艾拉举起圣剑刺穿古龙的心脏,龙血溅满铠甲,古龙轰然倒地,艾拉跪倒在滚烫的岩石上痛哭,完成了她不得不做的弑杀","edges":[{"s":"艾拉","t":"古龙","r":"以圣剑刺穿心脏"}],"where":"火山口"}]}`; -const JSON_PREFILL = '{"anchors":['; - // ============================================================================ // 睡眠工具 // ============================================================================ const sleep = (ms) => new Promise(r => setTimeout(r, ms)); +function previewText(text, maxLen = DEBUG_RAW_PREVIEW_LEN) { + const raw = String(text ?? '').replace(/\s+/g, ' ').trim(); + if (!raw) return '(empty)'; + return raw.length > maxLen ? `${raw.slice(0, maxLen)} ...(truncated)` : raw; +} + const ACTION_STRIP_WORDS = [ '突然', '非常', '有些', '有点', '轻轻', '悄悄', '缓缓', '立刻', '马上', '然后', '并且', '而且', '开始', '继续', '再次', '正在', @@ -206,7 +211,7 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op const aiText = filterText(aiMessage.mes); parts.push(`\n${aiText}\n`); - const input = `\n${parts.join('\n')}\n`; + const input = `\n${parts.join('\n')}\n\n请读取上述 内容,提取 1-2 个场景锚点,并严格按 JSON 输出。\n不要解释,不要续写,不要角色扮演,不要输出 JSON 以外的任何内容。`; for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) { if (batchCancelled) return []; @@ -215,7 +220,6 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op const response = await callLLM([ { role: 'system', content: SYSTEM_PROMPT }, { role: 'user', content: input }, - { role: 'assistant', content: JSON_PREFILL }, ], { temperature: 0.3, max_tokens: 600, @@ -223,6 +227,7 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op }); const rawText = String(response || ''); + xbLog.info(MODULE_ID, `floor ${aiFloor} attempt ${attempt} rawText(len=${rawText.length}): ${previewText(rawText)}`); if (!rawText.trim()) { if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); @@ -231,11 +236,11 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op return null; } - const fullJson = JSON_PREFILL + rawText; + xbLog.info(MODULE_ID, `floor ${aiFloor} attempt ${attempt} parseSource(len=${rawText.length}): ${previewText(rawText)}`); let parsed; try { - parsed = parseJson(fullJson); + parsed = parseJson(rawText); } catch (e) { xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败 (attempt ${attempt})`); if (attempt < RETRY_COUNT) { @@ -248,6 +253,7 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op // 兼容:优先 anchors,回退 atoms const rawAnchors = parsed?.anchors; if (!rawAnchors || !Array.isArray(rawAnchors)) { + xbLog.warn(MODULE_ID, `floor ${aiFloor} attempt ${attempt} 缺少有效 anchors,parsed=${previewText(JSON.stringify(parsed))}`); if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; @@ -261,6 +267,12 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op .map((a, idx) => anchorToAtom(a, aiFloor, idx)) .filter(Boolean); + xbLog.info(MODULE_ID, `floor ${aiFloor} attempt ${attempt} anchors=${rawAnchors.length} atoms=${atoms.length}`); + + if (rawAnchors.length === 0) { + return []; + } + return atoms; } catch (e) { @@ -373,4 +385,3 @@ export async function batchExtractAtoms(chat, onProgress) { return allAtoms; } - diff --git a/modules/story-summary/vector/llm/llm-service.js b/modules/story-summary/vector/llm/llm-service.js index 7120b64..6309aa0 100644 --- a/modules/story-summary/vector/llm/llm-service.js +++ b/modules/story-summary/vector/llm/llm-service.js @@ -29,12 +29,13 @@ function b64UrlEncode(str) { /** * 统一LLM调用 - 走酒馆后端(非流式) - * assistant prefill 用 bottomassistant 参数传递 + * 临时改为标准 messages 调用,避免 bottomassistant prefill 兼容性问题。 */ export async function callLLM(messages, options = {}) { const { temperature = 0.2, max_tokens = 500, + timeout = 40000, } = options; const mod = getStreamingModule(); @@ -45,14 +46,7 @@ export async function callLLM(messages, options = {}) { throw new Error('L0 requires siliconflow API key'); } - // 分离 assistant prefill - let topMessages = [...messages]; - let assistantPrefill = ''; - - if (topMessages.length > 0 && topMessages[topMessages.length - 1]?.role === 'assistant') { - const lastMsg = topMessages.pop(); - assistantPrefill = lastMsg.content || ''; - } + const topMessages = [...messages].filter(msg => msg?.role !== 'assistant'); const top64 = b64UrlEncode(JSON.stringify(topMessages)); const uniqueId = generateUniqueId('l0'); @@ -74,13 +68,14 @@ export async function callLLM(messages, options = {}) { args.enable_thinking = 'false'; } - // ★ 用 bottomassistant 参数传递 prefill - if (assistantPrefill) { - args.bottomassistant = assistantPrefill; - } - try { - const result = await mod.xbgenrawCommand(args, ''); + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error(`L0 request timeout after ${timeout}ms`)), timeout); + }); + const result = await Promise.race([ + mod.xbgenrawCommand(args, ''), + timeoutPromise, + ]); return String(result ?? ''); } catch (e) { xbLog.error(MODULE_ID, 'LLM调用失败', e); diff --git a/modules/story-summary/vector/pipeline/state-integration.js b/modules/story-summary/vector/pipeline/state-integration.js index bd0516b..1ebb7a0 100644 --- a/modules/story-summary/vector/pipeline/state-integration.js +++ b/modules/story-summary/vector/pipeline/state-integration.js @@ -184,7 +184,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options // ★ 限流检测:连续失败 N 次后暂停并降速 let consecutiveFailures = 0; let rateLimited = false; - const RATE_LIMIT_THRESHOLD = 3; // 连续失败多少次触发限流保护 + const RATE_LIMIT_THRESHOLD = 6; // 连续失败多少次触发限流保护 const RATE_LIMIT_WAIT_MS = 60000; // 限流后等待时间(60 秒) const RETRY_INTERVAL_MS = 1000; // 降速模式下每次请求间隔(1 秒) const RETRY_CONCURRENCY = 1; // ★ 降速模式下的并发数(默认1,建议不要超过5) @@ -201,7 +201,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options } try { - const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 }); + const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 40000 }); if (extractionCancelled) return;