// ============================================================================ // atom-extraction.js - L0 叙事锚点提取（三层 themes 版） // ============================================================================ import { callLLM, parseJson } from './llm-service.js'; import { xbLog } from '../../../../core/debug-core.js'; import { filterText } from '../utils/text-filter.js'; const MODULE_ID = 'atom-extraction'; const CONCURRENCY = 10; const RETRY_COUNT = 2; const RETRY_DELAY = 500; const DEFAULT_TIMEOUT = 20000; const STAGGER_DELAY = 80; let batchCancelled = false; export function cancelBatchExtraction() { batchCancelled = true; } export function isBatchCancelled() { return batchCancelled; } // ============================================================================ // L0 提取 Prompt（三层 themes） // ============================================================================ const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话中提取4-8个关键锚点，用于后续语义检索。输入格式： ... ... 只输出严格JSON： {"atoms":[{"t":"类型","s":"主体","o":"客体","v":"谓词","l":"地点","f":"来源","th":{"fn":[],"pt":[],"kw":[]}}]} ## 类型（t） - emo: 情绪状态变化 - act: 关键动作/行为 - rev: 揭示/发现/真相 - dec: 决定/承诺/宣言 - ten: 冲突/张力/对立 - loc: 场景/地点变化 ## 字段说明 - s: 主体（必填） - o: 客体（可空） - v: 谓词，15字内（必填） - l: 地点（可空） - f: "u"=用户 / "a"=角色（必填） - th: 主题标签（必填，结构化对象） ## th 三层结构 fn（叙事功能）1-2个，枚举： establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探 commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转 bond=连接羁绊 | break=断裂破坏 pt（互动模式）1-3个，枚举： power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺 asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密 ritual=仪式正式 | routine=日常惯例 | triangular=三方介入 kw（具体关键词）1-3个，自由格式 ## 示例输出 {"atoms":[ {"t":"act","s":"艾拉","o":"古龙","v":"用圣剑刺穿心脏","l":"火山口","f":"a", "th":{"fn":["commit"],"pt":["power_down","ritual"],"kw":["战斗","牺牲"]}}, {"t":"emo","s":"林夏","o":"陆远","v":"意识到自己喜欢他","l":"","f":"a", "th":{"fn":["reveal","escalate"],"pt":["asymmetric","secluded"],"kw":["心动","暗恋"]}}, {"t":"dec","s":"凯尔","o":"王国","v":"放弃王位继承权","l":"王座厅","f":"a", "th":{"fn":["commit","break"],"pt":["ritual","witnessed"],"kw":["抉择","自由"]}}, {"t":"rev","s":"","o":"","v":"管家其实是间谍","l":"","f":"a", "th":{"fn":["reveal"],"pt":["asymmetric"],"kw":["背叛","真相"]}}, {"t":"ten","s":"兄弟二人","o":"","v":"为遗产反目","l":"","f":"a", "th":{"fn":["conflict","break"],"pt":["power_contest"],"kw":["冲突","亲情破裂"]}} ]} 规则： - 只提取对未来检索有价值的锚点 - fn 回答"这在故事里推动了什么" - pt 回答"这是什么结构的互动" - kw 用于细粒度检索 - 无明显锚点时返回 {"atoms":[]}`; const JSON_PREFILL = '{"atoms":['; // ============================================================================ // Semantic 构建 // ============================================================================ function buildSemantic(atom, userName, aiName) { const type = atom.t || 'act'; const subject = atom.s || (atom.f === 'u' ? userName : aiName); const object = atom.o || ''; const verb = atom.v || ''; const location = atom.l || ''; // 三层 themes 合并 const th = atom.th || {}; const tags = [ ...(Array.isArray(th.fn) ? th.fn : []), ...(Array.isArray(th.pt) ? th.pt : []), ...(Array.isArray(th.kw) ? th.kw : []), ].filter(Boolean); const typePart = `<${type}>`; const themePart = tags.length > 0 ? ` [${tags.join('/')}]` : ''; const locPart = location ? ` 在${location}` : ''; const objPart = object ? ` -> ${object}` : ''; let semantic = ''; switch (type) { case 'emo': semantic = object ? `${typePart} ${subject} -> ${verb} (对${object})${locPart}` : `${typePart} ${subject} -> ${verb}${locPart}`; break; case 'act': semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`; break; case 'rev': semantic = object ? `${typePart} 揭示: ${verb} (关于${object})${locPart}` : `${typePart} 揭示: ${verb}${locPart}`; break; case 'dec': semantic = object ? `${typePart} ${subject} -> ${verb} (对${object})${locPart}` : `${typePart} ${subject} -> ${verb}${locPart}`; break; case 'ten': semantic = object ? `${typePart} ${subject} <-> ${object}: ${verb}${locPart}` : `${typePart} ${subject}: ${verb}${locPart}`; break; case 'loc': semantic = location ? `${typePart} 场景: ${location} - ${verb}` : `${typePart} 场景: ${verb}`; break; default: semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`; } return semantic + themePart; } // ============================================================================ // 睡眠工具 // ============================================================================ const sleep = (ms) => new Promise(r => setTimeout(r, ms)); // ============================================================================ // 单轮提取（带重试） // ============================================================================ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) { const { timeout = DEFAULT_TIMEOUT } = options; if (!aiMessage?.mes?.trim()) return []; const parts = []; const userName = userMessage?.name || '用户'; const aiName = aiMessage.name || '角色'; if (userMessage?.mes?.trim()) { const userText = filterText(userMessage.mes); parts.push(`\n${userText}\n`); } const aiText = filterText(aiMessage.mes); parts.push(`\n${aiText}\n`); const input = `\n${parts.join('\n')}\n`; for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) { if (batchCancelled) return []; try { const response = await callLLM([ { role: 'system', content: SYSTEM_PROMPT }, { role: 'user', content: input }, { role: 'assistant', content: JSON_PREFILL }, ], { temperature: 0.2, max_tokens: 1000, timeout, }); const rawText = String(response || ''); if (!rawText.trim()) { if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; } return null; } const fullJson = JSON_PREFILL + rawText; let parsed; try { parsed = parseJson(fullJson); } catch (e) { xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败`); if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; } return null; } if (!parsed?.atoms || !Array.isArray(parsed.atoms)) { if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; } return null; } const filtered = parsed.atoms .filter(a => a?.t && a?.v) .map((a, idx) => ({ atomId: `atom-${aiFloor}-${idx}`, floor: aiFloor, type: a.t, subject: a.s || null, object: a.o || null, value: String(a.v).slice(0, 50), location: a.l || null, source: a.f === 'u' ? 'user' : 'ai', themes: a.th || { fn: [], pt: [], kw: [] }, semantic: buildSemantic(a, userName, aiName), })); return filtered; } catch (e) { if (batchCancelled) return null; if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY * (attempt + 1)); continue; } xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e); return null; } } return null; } export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) { return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options); } // ============================================================================ // 批量提取 // ============================================================================ export async function batchExtractAtoms(chat, onProgress) { if (!chat?.length) return []; batchCancelled = false; const pairs = []; for (let i = 0; i < chat.length; i++) { if (!chat[i].is_user) { const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null; pairs.push({ userMsg, aiMsg: chat[i], aiFloor: i }); } } if (!pairs.length) return []; const allAtoms = []; let completed = 0; let failed = 0; for (let i = 0; i < pairs.length; i += CONCURRENCY) { if (batchCancelled) break; const batch = pairs.slice(i, i + CONCURRENCY); if (i === 0) { const promises = batch.map((pair, idx) => (async () => { await sleep(idx * STAGGER_DELAY); if (batchCancelled) return; try { const atoms = await extractAtomsForRoundWithRetry( pair.userMsg, pair.aiMsg, pair.aiFloor, { timeout: DEFAULT_TIMEOUT } ); if (atoms?.length) { allAtoms.push(...atoms); } else if (atoms === null) { failed++; } } catch { failed++; } completed++; onProgress?.(completed, pairs.length, failed); })()); await Promise.all(promises); } else { const promises = batch.map(pair => extractAtomsForRoundWithRetry( pair.userMsg, pair.aiMsg, pair.aiFloor, { timeout: DEFAULT_TIMEOUT } ) .then(atoms => { if (batchCancelled) return; if (atoms?.length) { allAtoms.push(...atoms); } else if (atoms === null) { failed++; } completed++; onProgress?.(completed, pairs.length, failed); }) .catch(() => { if (batchCancelled) return; failed++; completed++; onProgress?.(completed, pairs.length, failed); }) ); await Promise.all(promises); } if (i + CONCURRENCY < pairs.length && !batchCancelled) { await sleep(30); } } xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`); return allAtoms; }