From d3d818da6a1b1266b1c516f9ed158a088e1b155d Mon Sep 17 00:00:00 2001 From: bielie Date: Sun, 8 Feb 2026 12:22:45 +0800 Subject: [PATCH] chore: update story summary and lint fixes --- modules/story-summary/generate/llm.js | 43 +- modules/story-summary/generate/prompt.js | 653 ++++++---- modules/story-summary/story-summary-ui.js | 55 +- modules/story-summary/story-summary.css | 25 + modules/story-summary/story-summary.html | 15 +- modules/story-summary/story-summary.js | 16 +- .../vector/llm/atom-extraction.js | 225 +++- .../story-summary/vector/llm/llm-service.js | 25 +- .../vector/llm/query-expansion.js | 313 ++++- modules/story-summary/vector/llm/reranker.js | 184 +++ .../vector/pipeline/state-integration.js | 202 ++- .../vector/pipeline/state-recall.js | 43 +- .../story-summary/vector/retrieval/metrics.js | 388 ++++++ .../story-summary/vector/retrieval/recall.js | 1108 ++++++++++------- modules/streaming-generation.js | 36 +- 15 files changed, 2479 insertions(+), 852 deletions(-) create mode 100644 modules/story-summary/vector/llm/reranker.js create mode 100644 modules/story-summary/vector/retrieval/metrics.js diff --git a/modules/story-summary/generate/llm.js b/modules/story-summary/generate/llm.js index 227d1e5..4371a61 100644 --- a/modules/story-summary/generate/llm.js +++ b/modules/story-summary/generate/llm.js @@ -11,6 +11,8 @@ const PROVIDER_MAP = { custom: "custom", }; +const JSON_PREFILL = '{"mindful_prelude": {'; + const LLM_PROMPT_CONFIG = { topSystem: `Story Analyst: This task involves narrative comprehension and structured incremental summarization, representing creative story analysis at the intersection of plot tracking and character development. As a story analyst, you will conduct systematic evaluation of provided dialogue content to generate structured incremental summary data. [Read the settings for this task] @@ -161,6 +163,16 @@ Before generating, observe the USER and analyze carefully: - What arc PROGRESS was made? - What facts changed? (status/position/ownership/relationships) +## factUpdates 规则 +- 目的: 纠错 & 世界一致性约束,只记录硬性事实 +- s+p 为键,相同键会覆盖旧值 +- isState: true=核心约束(位置/身份/生死/关系),false=有容量上限会被清理 +- 关系类: p="对X的看法",trend 必填(破裂|厌恶|反感|陌生|投缘|亲密|交融) +- 删除: {s, p, retracted: true},不需要 o 字段 +- 更新: {s, p, o, isState, trend?} +- 谓词规范化: 复用已有谓词,不要发明同义词 +- 只输出有变化的条目,确保少、硬、稳定 + ## Output Format \`\`\`json { @@ -170,7 +182,7 @@ Before generating, observe the USER and analyze carefully: "fact_changes": "识别到的事实变化概述" }, "keywords": [ - {"text": "综合已有+新内容的全局关键词(5-10个)", "weight": "核心|重要|一般"} + {"text": "综合历史+新内容的全剧情关键词(5-10个)", "weight": "核心|重要|一般"} ], "events": [ { @@ -178,7 +190,7 @@ Before generating, observe the USER and analyze carefully: "title": "地点·事件标题", "timeLabel": "时间线标签(如:开场、第二天晚上)", "summary": "1-2句话描述,涵盖丰富信息素,末尾标注楼层(#X-Y)", - "participants": ["参与角色名"], + "participants": ["参与角色名,不要使用人称代词或别名,只用正式人名"], "type": "相遇|冲突|揭示|抉择|羁绊|转变|收束|日常", "weight": "核心|主线|转折|点睛|氛围", "causedBy": ["evt-12", "evt-14"] @@ -186,30 +198,15 @@ Before generating, observe the USER and analyze carefully: ], "newCharacters": ["仅本次首次出现的角色名"], "arcUpdates": [ - {"name": "角色名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"} + {"name": "角色名,不要使用人称代词或别名,只用正式人名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"} ], - "factUpdates": [ - { - "s": "主体", - "p": "谓词(复用已有谓词,避免同义词)", - "o": "当前值", - "isState": true/false, - "trend": "仅关系类:破裂|厌恶|反感|陌生|投缘|亲密|交融" - } + "factUpdates": [ + {"s": "主体", "p": "谓词", "o": "当前值", "isState": true, "trend": "仅关系类填"}, + {"s": "要删除的主体", "p": "要删除的谓词", "retracted": true} ] } - - \`\`\` -## factUpdates 规则 -- 目的: 纠错 & 世界一致性约束,只记录硬性事实 -- s+p 为键,相同键会覆盖旧值 -- isState: true=核心约束(位置/身份/生死/关系),false=有容量上限会被清理 -- 关系类: p="对X的看法",trend 必填 -- 删除: 设置 retracted: true -- 谓词规范化: 复用已有谓词,不要发明同义词 -- 只输出有变化的条目,确保少、硬、稳定 ## CRITICAL NOTES - events.id 从 evt-{nextEventId} 开始编号 - 仅输出【增量】内容,已有事件绝不重复 @@ -242,7 +239,7 @@ All checks passed. Beginning incremental extraction... userConfirm: `怎么截断了!重新完整生成,只输出JSON,不要任何其他内容 `, - assistantPrefill: `非常抱歉!现在重新完整生成JSON。` + assistantPrefill: JSON_PREFILL }; // ═══════════════════════════════════════════════════════════════════════════ @@ -437,5 +434,5 @@ export async function generateSummary(options) { console.log(rawOutput); console.groupEnd(); - return rawOutput; + return JSON_PREFILL + rawOutput; } diff --git a/modules/story-summary/generate/prompt.js b/modules/story-summary/generate/prompt.js index c5d007f..a30465b 100644 --- a/modules/story-summary/generate/prompt.js +++ b/modules/story-summary/generate/prompt.js @@ -1,7 +1,7 @@ -// ═══════════════════════════════════════════════════════════════════════════ -// Story Summary - Prompt Injection (Final Clean Version) +// ═══════════════════════════════════════════════════════════════════════════ +// Story Summary - Prompt Injection (v2 - DSL 版) // - 仅负责"构建注入文本",不负责写入 extension_prompts -// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts(IN_CHAT + depth) +// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts // ═══════════════════════════════════════════════════════════════════════════ import { getContext } from "../../../../../../extensions.js"; @@ -11,15 +11,22 @@ import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/con import { recallMemory, buildQueryText } from "../vector/retrieval/recall.js"; import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/storage/chunk-store.js"; +// METRICS +import { formatMetricsLog, detectIssues } from "../vector/retrieval/metrics.js"; + const MODULE_ID = "summaryPrompt"; // ───────────────────────────────────────────────────────────────────────────── -// 召回失败提示节流(避免连续生成刷屏) +// 召回失败提示节流 // ───────────────────────────────────────────────────────────────────────────── let lastRecallFailAt = 0; const RECALL_FAIL_COOLDOWN_MS = 10_000; +/** + * 检查是否可以通知召回失败 + * @returns {boolean} + */ function canNotifyRecallFail() { const now = Date.now(); if (now - lastRecallFailAt < RECALL_FAIL_COOLDOWN_MS) return false; @@ -28,21 +35,26 @@ function canNotifyRecallFail() { } // ───────────────────────────────────────────────────────────────────────────── -// 预算常量(向量模式使用) +// 预算常量 // ───────────────────────────────────────────────────────────────────────────── -const MAIN_BUDGET_MAX = 10000; // 主装配预算(世界/事件/远期/弧光) -const ORPHAN_MAX = 2500; // 远期上限 -const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算 -const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示) -const L3_MAX = 2000; +const MAIN_BUDGET_MAX = 10000; +const ORPHAN_MAX = 2500; +const RECENT_ORPHAN_MAX = 5000; +const TOTAL_BUDGET_MAX = 15000; +const L1_MAX = 2000; const ARCS_MAX = 1500; -const TOP_N_STAR = 5; // 相似度前N条加⭐ +const TOP_N_STAR = 5; // ───────────────────────────────────────────────────────────────────────────── // 工具函数 // ───────────────────────────────────────────────────────────────────────────── +/** + * 估算 token 数量 + * @param {string} text - 文本 + * @returns {number} token 数 + */ function estimateTokens(text) { if (!text) return 0; const s = String(text); @@ -50,6 +62,13 @@ function estimateTokens(text) { return Math.ceil(zh + (s.length - zh) / 4); } +/** + * 带预算控制的行推入 + * @param {Array} lines - 行数组 + * @param {string} text - 文本 + * @param {object} state - 预算状态 {used, max} + * @returns {boolean} 是否成功 + */ function pushWithBudget(lines, text, state) { const t = estimateTokens(text); if (state.used + t > state.max) return false; @@ -58,6 +77,12 @@ function pushWithBudget(lines, text, state) { return true; } +/** + * 计算余弦相似度 + * @param {Array} a - 向量 a + * @param {Array} b - 向量 b + * @returns {number} 相似度 + */ function cosineSimilarity(a, b) { if (!a?.length || !b?.length || a.length !== b.length) return 0; let dot = 0, nA = 0, nB = 0; @@ -69,7 +94,11 @@ function cosineSimilarity(a, b) { return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0; } -// 从 summary 解析楼层范围:(#321-322) 或 (#321) +/** + * 解析楼层范围 + * @param {string} summary - 摘要文本 + * @returns {object|null} {start, end} + */ function parseFloorRange(summary) { if (!summary) return null; const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/); @@ -79,42 +108,63 @@ function parseFloorRange(summary) { return { start, end }; } -// 去掉 summary 末尾楼层标记(按你要求:事件本体不显示楼层范围) +/** + * 清理摘要中的楼层标记 + * @param {string} summary - 摘要文本 + * @returns {string} 清理后的文本 + */ function cleanSummary(summary) { return String(summary || "") .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, "") .trim(); } +/** + * 规范化字符串(用于比较) + * @param {string} s - 字符串 + * @returns {string} 规范化后的字符串 + */ +function normalize(s) { + return String(s || '') + .normalize('NFKC') + .replace(/[\u200B-\u200D\uFEFF]/g, '') + .trim() + .toLowerCase(); +} + // ───────────────────────────────────────────────────────────────────────────── // 上下文配对工具函数 // ───────────────────────────────────────────────────────────────────────────── /** - * 获取chunk的配对楼层 - * USER楼层 → 下一楼(AI回复) - * AI楼层 → 上一楼(USER发言) + * 获取上下文楼层 + * @param {object} chunk - chunk 对象 + * @returns {number} 配对楼层,-1 表示无效 */ function getContextFloor(chunk) { - if (chunk.isL0) return -1; // L0虚拟chunk不需要配对 + if (chunk.isL0) return -1; return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1; } /** - * 从候选chunks中选择最佳配对 - * 策略:优先选择相反角色的第一个chunk + * 选择配对 chunk + * @param {Array} candidates - 候选 chunks + * @param {object} mainChunk - 主 chunk + * @returns {object|null} 配对 chunk */ function pickContextChunk(candidates, mainChunk) { if (!candidates?.length) return null; const targetIsUser = !mainChunk.isUser; - // 优先相反角色 const opposite = candidates.find(c => c.isUser === targetIsUser); if (opposite) return opposite; - // 否则选第一个 return candidates[0]; } + /** - * 格式化配对chunk(完整显示,带缩进和方向符号) + * 格式化上下文 chunk 行 + * @param {object} chunk - chunk 对象 + * @param {boolean} isAbove - 是否在主 chunk 上方 + * @returns {string} 格式化的行 */ function formatContextChunkLine(chunk, isAbove) { const { name1, name2 } = getContext(); @@ -124,15 +174,14 @@ function formatContextChunkLine(chunk, isAbove) { return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`; } -/** - * 格式化配对chunk(缩进,简短摘要) - */ - - // ───────────────────────────────────────────────────────────────────────────── // 系统前导与后缀 // ───────────────────────────────────────────────────────────────────────────── +/** + * 构建系统前导 + * @returns {string} + */ function buildSystemPreamble() { return [ "以上是还留在眼前的对话", @@ -144,6 +193,10 @@ function buildSystemPreamble() { ].join("\n"); } +/** + * 构建后缀 + * @returns {string} + */ function buildPostscript() { return [ "", @@ -152,13 +205,110 @@ function buildPostscript() { } // ───────────────────────────────────────────────────────────────────────────── -// 格式化函数 +// L1 Facts 分层过滤 // ───────────────────────────────────────────────────────────────────────────── -function formatFactsForInjection(facts) { - const activeFacts = (facts || []).filter(f => !f.retracted); - if (!activeFacts.length) return []; - return activeFacts +/** + * 从 store 获取所有已知角色名 + * @param {object} store - summary store + * @returns {Set} 角色名集合(规范化后) + */ +function getKnownCharacters(store) { + const names = new Set(); + + // 从 arcs 获取 + const arcs = store?.json?.arcs || []; + for (const a of arcs) { + if (a.name) names.add(normalize(a.name)); + } + + // 从 characters.main 获取 + const main = store?.json?.characters?.main || []; + for (const m of main) { + const name = typeof m === 'string' ? m : m.name; + if (name) names.add(normalize(name)); + } + + // 从当前角色获取 + const { name1, name2 } = getContext(); + if (name1) names.add(normalize(name1)); + if (name2) names.add(normalize(name2)); + + return names; +} + +/** + * 解析关系类 fact 的目标人物 + * @param {string} predicate - 谓词,如 "对蓝袖的看法" + * @returns {string|null} 目标人物名 + */ +function parseRelationTarget(predicate) { + const match = String(predicate || '').match(/^对(.+)的/); + return match ? match[1] : null; +} + +/** + * 过滤 facts(分层策略) + * + * 规则: + * - isState=true:全量保留 + * - 关系类(谓词匹配 /^对.+的/):from 或 to 在 focus 中 + * - 人物状态类(主体是已知角色名):主体在 focus 中 + * - 其他(物品/地点/规则):全量保留 + * + * @param {Array} facts - 所有 facts + * @param {Array} focusEntities - 焦点实体 + * @param {Set} knownCharacters - 已知角色名集合 + * @returns {Array} 过滤后的 facts + */ +function filterFactsByRelevance(facts, focusEntities, knownCharacters) { + if (!facts?.length) return []; + + const focusSet = new Set((focusEntities || []).map(normalize)); + + return facts.filter(f => { + // 1. isState=true:全量保留 + if (f._isState === true) return true; + + // 2. 关系类:from 或 to 在 focus 中 + if (isRelationFact(f)) { + const from = normalize(f.s); + const target = parseRelationTarget(f.p); + const to = target ? normalize(target) : ''; + + // 任一方在 focus 中即保留 + if (focusSet.has(from) || focusSet.has(to)) return true; + + // 都不在 focus 中则过滤 + return false; + } + + // 3. 主体是已知角色名:检查是否在 focus 中 + const subjectNorm = normalize(f.s); + if (knownCharacters.has(subjectNorm)) { + return focusSet.has(subjectNorm); + } + + // 4. 主体不是人名(物品/地点/规则等):保留 + return true; + }); +} + +/** + * 格式化 facts 用于注入 + * @param {Array} facts - facts 数组 + * @param {Array} focusEntities - 焦点实体 + * @param {Set} knownCharacters - 已知角色名集合 + * @returns {Array} 格式化后的行 + */ +function formatFactsForInjection(facts, focusEntities, knownCharacters) { + // 先过滤 + const filtered = filterFactsByRelevance(facts, focusEntities, knownCharacters); + + if (!filtered.length) return []; + + // 按 since 降序排序(最新的优先) + return filtered .sort((a, b) => (b.since || 0) - (a.since || 0)) .map(f => { const since = f.since ? ` (#${f.since + 1})` : ''; @@ -169,6 +319,15 @@ function formatFactsForInjection(facts) { }); } +// ───────────────────────────────────────────────────────────────────────────── +// 格式化函数 +// ───────────────────────────────────────────────────────────────────────────── + +/** + * 格式化角色弧光行 + * @param {object} a - 弧光对象 + * @returns {string} + */ function formatArcLine(a) { const moments = (a.moments || []) .map(m => (typeof m === "string" ? m : m.text)) @@ -180,23 +339,27 @@ function formatArcLine(a) { return `- ${a.name}:${a.trajectory}`; } -// 完整 chunk 输出(支持 L0 虚拟 chunk) +/** + * 格式化 chunk 完整行 + * @param {object} c - chunk 对象 + * @returns {string} + */ function formatChunkFullLine(c) { const { name1, name2 } = getContext(); - // L0 虚拟 chunk if (c.isL0) { return `› #${c.floor + 1} [📌] ${String(c.text || "").trim()}`; } - // L1 真实 chunk - const speaker = c.isUser ? (name1 || "用户") : (name2 || "角色"); + const speaker = c.isUser ? (name1 || "用户") : (c.speaker || name2 || "角色"); return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`; } /** - * 格式化chunk及其配对上下文 - * 返回数组:[配对行(如果在前), 主chunk行, 配对行(如果在后)] + * 格式化带上下文的 chunk + * @param {object} mainChunk - 主 chunk + * @param {object|null} contextChunk - 上下文 chunk + * @returns {Array} 格式化的行数组 */ function formatChunkWithContext(mainChunk, contextChunk) { const lines = []; @@ -218,7 +381,12 @@ function formatChunkWithContext(mainChunk, contextChunk) { return lines; } -// 因果事件格式(仅作为"前因线索"展示,仍保留楼层提示) +/** + * 格式化因果事件行 + * @param {object} causalItem - 因果项 + * @param {Map} causalById - 因果映射 + * @returns {string} + */ function formatCausalEventLine(causalItem, causalById) { const ev = causalItem?.event || {}; const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1)); @@ -246,101 +414,56 @@ function formatCausalEventLine(causalItem, causalById) { return lines.join("\n"); } -// ───────────────────────────────────────────────────────────────────────────── -// 装配日志(开发调试用) -// ───────────────────────────────────────────────────────────────────────────── -function formatInjectionLog(stats, details, recentOrphanStats = null) { - const pct = (n, d) => (d > 0 ? Math.round((n / d) * 100) : 0); - - const lines = [ - '', - '┌─────────────────────────────────────────────────────────────┐', - '│ 【装配统计】 │', - '└─────────────────────────────────────────────────────────────┘', - ` 总预算: ${stats.budget.max} tokens | 已使用: ${stats.budget.used} tokens (${pct(stats.budget.used, stats.budget.max)}%)`, - '', - ]; - - // [1] 世界约束 - lines.push(` [1] 世界约束 (上限 2000)`); - lines.push(` 选入: ${stats.facts.count} 条 | 消耗: ${stats.facts.tokens} tokens`); - lines.push(''); - - // [2] 核心经历 + 过往背景 - lines.push(` [2] 核心经历 + 过往背景`); - lines.push(` 事件: ${stats.events.selected} 条 | 消耗: ${stats.events.tokens} tokens`); - - // 证据统计(区分 L0 和 L1) - const l0EvidenceCount = details.eventList?.filter(e => e.hasL0Evidence)?.length || 0; - const l1EvidenceCount = (stats.evidence.attached || 0) - l0EvidenceCount; - lines.push(` 证据: ${stats.evidence.attached} 条 (L0: ${l0EvidenceCount}, L1: ${l1EvidenceCount}) | 消耗: ${stats.evidence.tokens} tokens`); - lines.push(` 核心: ${details.directCount || 0} 条 | 过往: ${details.similarCount || 0} 条`); - lines.push(''); - - // [3] 远期片段 - const l0OrphanCount = stats.orphans.l0Count || 0; - const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount; - lines.push(` [3] 远期片段 (已总结范围)`); - lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`); - lines.push(` 配对: ${stats.orphans.contextPairs || 0} 条`); - lines.push(''); - - // [4] 待整理 - lines.push(` [4] 待整理 (独立预算 5000)`); - lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`); - lines.push(` 配对: ${recentOrphanStats?.contextPairs || 0} 条`); - lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`); - lines.push(''); - - // [5] 人物弧光 - lines.push(` [5] 人物弧光 (上限 1500)`); - lines.push(` 选入: ${stats.arcs.count} 条 | 消耗: ${stats.arcs.tokens} tokens`); - lines.push(''); - - // 预算条形图 - lines.push(' 【预算分布】'); - const total = stats.budget.max; - const bar = (tokens, label) => { - const width = Math.round((tokens / total) * 30); - const pctStr = pct(tokens, total) + '%'; - return ` ${label.padEnd(6)} ${'█'.repeat(width).padEnd(30)} ${String(tokens).padStart(5)} (${pctStr})`; - }; - lines.push(bar(stats.facts.tokens, '约束')); - lines.push(bar(stats.events.tokens + stats.evidence.tokens, '经历')); - lines.push(bar(stats.orphans.tokens, '远期')); - lines.push(bar(recentOrphanStats?.tokens || 0, '待整理')); - lines.push(bar(stats.arcs.tokens, '弧光')); - lines.push(bar(stats.budget.max - stats.budget.used, '剩余')); - lines.push(''); - - return lines.join('\n'); -} - -// 重写事件文本里的序号前缀:把 "{idx}. " 或 "{idx}.【...】" 的 idx 替换 +/** + * 重新编号事件文本 + * @param {string} text - 事件文本 + * @param {number} newIndex - 新编号 + * @returns {string} + */ function renumberEventText(text, newIndex) { const s = String(text || ""); - // 匹配行首: "12." 或 "12.【" return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`); } +/** + * 获取事件排序键 + * @param {object} ev - 事件对象 + * @returns {number} + */ function getEventSortKey(ev) { const r = parseFloorRange(ev?.summary); - if (r) return r.start; // 按事件出现楼层排序(最靠谱) + if (r) return r.start; const m = String(ev?.id || "").match(/evt-(\d+)/); return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER; } // ───────────────────────────────────────────────────────────────────────────── -// 非向量模式:全量总结注入(世界 + 事件 + 弧光) -// 仅在 GENERATION_STARTED 调用 +// 非向量模式 // ───────────────────────────────────────────────────────────────────────────── +/** + * 构建非向量模式的 prompt + * @param {object} store - summary store + * @returns {string} + */ function buildNonVectorPrompt(store) { const data = store.json || {}; const sections = []; - const factLines = formatFactsForInjection(getFacts(store)); + // L1 facts(非向量模式不做分层过滤,全量注入) + const allFacts = getFacts(); + const factLines = allFacts + .filter(f => !f.retracted) + .sort((a, b) => (b.since || 0) - (a.since || 0)) + .map(f => { + const since = f.since ? ` (#${f.since + 1})` : ''; + if (isRelationFact(f) && f.trend) { + return `- ${f.s} ${f.p}: ${f.o} [${f.trend}]${since}`; + } + return `- ${f.s}的${f.p}: ${f.o}${since}`; + }); + if (factLines.length) { sections.push(`[定了的事] 已确立的事实\n${factLines.join("\n")}`); } @@ -371,6 +494,10 @@ function buildNonVectorPrompt(store) { ); } +/** + * 构建非向量模式的注入文本 + * @returns {string} + */ export function buildNonVectorPromptText() { if (!getSettings().storySummary?.enabled) { return ""; @@ -386,7 +513,6 @@ export function buildNonVectorPromptText() { return ""; } - // wrapper(沿用面板设置) const cfg = getSummaryPanelConfig(); if (cfg.trigger?.wrapperHead) text = cfg.trigger.wrapperHead + "\n" + text; if (cfg.trigger?.wrapperTail) text = text + "\n" + cfg.trigger.wrapperTail; @@ -395,18 +521,27 @@ export function buildNonVectorPromptText() { } // ───────────────────────────────────────────────────────────────────────────── -// 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光) +// 向量模式:预算装配 // ───────────────────────────────────────────────────────────────────────────── -async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) { +/** + * 构建向量模式的 prompt + * @param {object} store - summary store + * @param {object} recallResult - 召回结果 + * @param {Map} causalById - 因果映射 + * @param {Array} focusEntities - 焦点实体 + * @param {object} meta - 元数据 + * @param {object} metrics - 指标对象 + * @returns {Promise} {promptText, injectionLogText, injectionStats, metrics} + */ +async function buildVectorPrompt(store, recallResult, causalById, focusEntities = [], meta = null, metrics = null) { + const T_Start = performance.now(); + const { chatId } = getContext(); const data = store.json || {}; const total = { used: 0, max: MAIN_BUDGET_MAX }; - // ═══════════════════════════════════════════════════════════════════ - // 预装配各层内容(先计算预算,后按顺序拼接) - // ═══════════════════════════════════════════════════════════════════ - + // 预装配容器 const assembled = { facts: { lines: [], tokens: 0 }, arcs: { lines: [], tokens: 0 }, @@ -417,7 +552,7 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities const injectionStats = { budget: { max: TOTAL_BUDGET_MAX, used: 0 }, - facts: { count: 0, tokens: 0 }, + facts: { count: 0, tokens: 0, filtered: 0 }, arcs: { count: 0, tokens: 0 }, events: { selected: 0, tokens: 0 }, evidence: { attached: 0, tokens: 0 }, @@ -430,37 +565,63 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities floorRange: "N/A", contextPairs: 0, }; + const details = { eventList: [], directCount: 0, similarCount: 0, }; - // ═══════════════════════════════════════════════════════════════════ - // [优先级 1] 世界约束 - 最高优先级 - // ═══════════════════════════════════════════════════════════════════ - const factLines = formatFactsForInjection(getFacts(store)); - if (factLines.length) { - const l3Budget = { used: 0, max: Math.min(L3_MAX, total.max - total.used) }; - for (const line of factLines) { - if (!pushWithBudget(assembled.facts.lines, line, l3Budget)) break; - } - assembled.facts.tokens = l3Budget.used; - total.used += l3Budget.used; - injectionStats.facts.count = assembled.facts.lines.length; - injectionStats.facts.tokens = l3Budget.used; + // ═══════════════════════════════════════════════════════════════════════ + // [优先级 1] 世界约束 - 最高优先级(带分层过滤) + // ═══════════════════════════════════════════════════════════════════════ + + const T_L1_Start = performance.now(); + + const allFacts = getFacts(); + const knownCharacters = getKnownCharacters(store); + const factLines = formatFactsForInjection(allFacts, focusEntities, knownCharacters); + + // METRICS: L1 指标 + if (metrics) { + metrics.l1.factsTotal = allFacts.length; + metrics.l1.factsFiltered = allFacts.length - factLines.length; } - // ═══════════════════════════════════════════════════════════════════ - // [优先级 2] 人物弧光 - 预留预算(稍后再拼接到末尾) - // ═══════════════════════════════════════════════════════════════════ + if (factLines.length) { + const l1Budget = { used: 0, max: Math.min(L1_MAX, total.max - total.used) }; + for (const line of factLines) { + if (!pushWithBudget(assembled.facts.lines, line, l1Budget)) break; + } + assembled.facts.tokens = l1Budget.used; + total.used += l1Budget.used; + injectionStats.facts.count = assembled.facts.lines.length; + injectionStats.facts.tokens = l1Budget.used; + injectionStats.facts.filtered = allFacts.length - factLines.length; + + // METRICS + if (metrics) { + metrics.l1.factsInjected = assembled.facts.lines.length; + metrics.l1.tokens = l1Budget.used; + metrics.l1.samples = assembled.facts.lines.slice(0, 3).map(line => + line.length > 60 ? line.slice(0, 60) + '...' : line + ); + metrics.timing.l1Constraints = Math.round(performance.now() - T_L1_Start); + } + } else if (metrics) { + metrics.timing.l1Constraints = Math.round(performance.now() - T_L1_Start); + } + + // ═══════════════════════════════════════════════════════════════════════ + // [优先级 2] 人物弧光 - 预留预算 + // ═══════════════════════════════════════════════════════════════════════ if (data.arcs?.length && total.used < total.max) { const { name1 } = getContext(); const userName = String(name1 || "").trim(); const relevant = new Set( - [userName, ...(queryEntities || [])] + [userName, ...(focusEntities || [])] .map(s => String(s || "").trim()) .filter(Boolean) ); @@ -483,13 +644,21 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities } } - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ // [优先级 3] 事件 + 证据 - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ + const recalledEvents = (recallResult?.events || []).filter(e => e?.event?.summary); const chunks = recallResult?.chunks || []; const usedChunkIds = new Set(); + /** + * 为事件选择最佳证据 chunk + * @param {object} eventObj - 事件对象 + * @returns {object|null} 最佳 chunk + */ + + // 优先 L0 虚拟 chunk,否则按 chunkIdx 选第一个 function pickBestChunkForEvent(eventObj) { const range = parseFloorRange(eventObj?.summary); if (!range) return null; @@ -498,11 +667,27 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities for (const c of chunks) { if (usedChunkIds.has(c.chunkId)) continue; if (c.floor < range.start || c.floor > range.end) continue; - if (!best || (c.similarity || 0) > (best.similarity || 0)) best = c; + + if (!best) { + best = c; + } else if (c.isL0 && !best.isL0) { + // L0 优先 + best = c; + } else if (c.isL0 === best.isL0 && (c.chunkIdx ?? 0) < (best.chunkIdx ?? 0)) { + // 同类型按 chunkIdx 选靠前的 + best = c; + } } return best; - } + } + /** + * 格式化带证据的事件 + * @param {object} e - 事件召回项 + * @param {number} idx - 索引 + * @param {object|null} chunk - 证据 chunk + * @returns {string} + */ function formatEventWithEvidence(e, idx, chunk) { const ev = e.event || {}; const time = ev.timeLabel || ""; @@ -529,11 +714,10 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities return lines.join("\n"); } - // 候选按相似度从高到低(保证高分优先拥有证据) const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); - const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence } - const selectedSimilar = []; // { event, text, tokens, chunk, hasEvidence } + const selectedDirect = []; + const selectedSimilar = []; for (let candidateRank = 0; candidateRank < candidates.length; candidateRank++) { const e = candidates[candidateRank]; @@ -544,14 +728,11 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities const bestChunk = pickBestChunkForEvent(e.event); - // 先尝试"带证据" - // idx 先占位写 0,后面统一按时间线重排后再改号 let text = formatEventWithEvidence(e, 0, bestChunk); let cost = estimateTokens(text); let hasEvidence = !!bestChunk; let chosenChunk = bestChunk || null; - // 塞不下就退化成"不带证据" if (total.used + cost > total.max) { text = formatEventWithEvidence(e, 0, null); cost = estimateTokens(text); @@ -563,7 +744,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities } } - // 写入 if (isDirect) { selectedDirect.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence, candidateRank }); } else { @@ -573,7 +753,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities injectionStats.events.selected++; total.used += cost; - // tokens 拆分记账(事件本体 vs 证据) if (hasEvidence && bestChunk) { const chunkLine = formatChunkFullLine(bestChunk); const ct = estimateTokens(chunkLine); @@ -581,7 +760,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities injectionStats.evidence.tokens += ct; usedChunkIds.add(bestChunk.chunkId); - // 事件本体 tokens = cost - ct(粗略但够调试) injectionStats.events.tokens += Math.max(0, cost - ct); } else { injectionStats.events.tokens += cost; @@ -593,14 +771,11 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities hasEvidence, tokens: cost, similarity: e.similarity || 0, + hasL0Evidence: bestChunk?.isL0 || false, }); } - // ═══════════════════════════════════════════════════════════════════ - // 重排:恢复时间线顺序(按楼层/evt 序号升序) - // 并统一重编号(不重新 pick chunk,不重新格式化结构) - // ═══════════════════════════════════════════════════════════════════ - + // 重排 selectedDirect.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event)); selectedSimilar.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event)); @@ -619,14 +794,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities assembled.events.direct = selectedDirectTexts; assembled.events.similar = selectedSimilarTexts; - // ═══════════════════════════════════════════════════════════════════ - // [优先级 4] 远期片段(已总结范围的 orphan chunks)- 带上下文配对 - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ + // [优先级 4] 远期片段(已总结范围的 orphan chunks) + // ═══════════════════════════════════════════════════════════════════════ + const lastSummarized = store.lastSummarizedMesId ?? -1; const lastChunkFloor = meta?.lastChunkFloor ?? -1; const keepVisible = store.keepVisibleCount ?? 3; - // 收集需要配对的楼层 const orphanContextFloors = new Set(); const orphanCandidates = chunks .filter(c => !usedChunkIds.has(c.chunkId)) @@ -638,7 +813,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities if (pairFloor >= 0) orphanContextFloors.add(pairFloor); } - // 批量获取配对楼层的chunks let contextChunksByFloor = new Map(); if (chatId && orphanContextFloors.size > 0) { try { @@ -663,7 +837,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities let contextPairsCount = 0; for (const c of orphans) { - // L0 不需要配对 if (c.isL0) { const line = formatChunkFullLine(c); if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break; @@ -672,15 +845,12 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities continue; } - // 获取配对chunk const pairFloor = getContextFloor(c); - const candidates = contextChunksByFloor.get(pairFloor) || []; - const contextChunk = pickContextChunk(candidates, c); + const pairCandidates = contextChunksByFloor.get(pairFloor) || []; + const contextChunk = pickContextChunk(pairCandidates, c); - // 格式化(带配对) const formattedLines = formatChunkWithContext(c, contextChunk); - // 尝试添加所有行 let allAdded = true; for (const line of formattedLines) { if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) { @@ -702,11 +872,10 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities injectionStats.orphans.contextPairs = contextPairsCount; } - // ═══════════════════════════════════════════════════════════════════ - // [独立预算] 待整理(未总结范围,独立 5000)- 带上下文配对 - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ + // [独立预算] 待整理(未总结范围) + // ═══════════════════════════════════════════════════════════════════════ - // 近期范围:(lastSummarized, lastChunkFloor - keepVisible] const recentStart = lastSummarized + 1; const recentEnd = lastChunkFloor - keepVisible; @@ -715,7 +884,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities .filter(c => !usedChunkIds.has(c.chunkId)) .filter(c => c.floor >= recentStart && c.floor <= recentEnd); - // 收集近期范围需要配对的楼层 const recentContextFloors = new Set(); for (const c of recentOrphanCandidates) { if (c.isL0) continue; @@ -723,10 +891,8 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities if (pairFloor >= 0) recentContextFloors.add(pairFloor); } - // 批量获取(复用已有的 or 新获取) let recentContextChunksByFloor = new Map(); if (chatId && recentContextFloors.size > 0) { - // 过滤掉已经获取过的 const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f)); if (newFloors.length > 0) { try { @@ -751,7 +917,6 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities let recentContextPairsCount = 0; for (const c of recentOrphans) { - // L0 不需要配对 if (c.isL0) { const line = formatChunkFullLine(c); if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break; @@ -759,15 +924,12 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities continue; } - // 获取配对chunk const pairFloor = getContextFloor(c); - const candidates = recentContextChunksByFloor.get(pairFloor) || []; - const contextChunk = pickContextChunk(candidates, c); + const pairCandidates = recentContextChunksByFloor.get(pairFloor) || []; + const contextChunk = pickContextChunk(pairCandidates, c); - // 格式化(带配对) const formattedLines = formatChunkWithContext(c, contextChunk); - // 尝试添加所有行 let allAdded = true; for (const line of formattedLines) { if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) { @@ -788,37 +950,39 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities recentOrphanStats.contextPairs = recentContextPairsCount; } - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ // 按注入顺序拼接 sections - // ═══════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════ + + const T_L4_Start = performance.now(); + const sections = []; - // 1. 世界约束 → 定了的事 + if (assembled.facts.lines.length) { sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`); } - // 2. 核心经历 → 印象深的事 if (assembled.events.direct.length) { sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`); } - // 3. 过往背景 → 好像有关的事 if (assembled.events.similar.length) { sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`); } - // 4. 远期片段 → 更早以前 if (assembled.orphans.lines.length) { sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`); } - // 5. 待整理 → 近期 if (assembled.recentOrphans.lines.length) { sections.push(`[近期] 清晰但还没整理\n${assembled.recentOrphans.lines.join("\n")}`); } - // 6. 人物弧光 → 这些人 if (assembled.arcs.lines.length) { sections.push(`[这些人] 他们的弧光\n${assembled.arcs.lines.join("\n")}`); } if (!sections.length) { - return { promptText: "", injectionLogText: "", injectionStats }; + if (metrics) { + metrics.timing.l3Assembly = Math.round(performance.now() - T_Start - (metrics.timing.l1Constraints || 0)); + metrics.timing.l4Formatting = 0; + } + return { promptText: "", injectionLogText: "", injectionStats, metrics }; } const promptText = @@ -826,17 +990,69 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities `<剧情记忆>\n\n${sections.join("\n\n")}\n\n\n` + `${buildPostscript()}`; - // ★ 修复:先写回预算统计,再生成日志 - injectionStats.budget.used = total.used + (assembled.recentOrphans.tokens || 0); - const injectionLogText = formatInjectionLog(injectionStats, details, recentOrphanStats); + // METRICS: 更新 L4 和 Budget 指标 + if (metrics) { + // L4 指标 + metrics.l4.sectionsIncluded = []; + if (assembled.facts.lines.length) metrics.l4.sectionsIncluded.push('constraints'); + if (assembled.events.direct.length) metrics.l4.sectionsIncluded.push('direct_events'); + if (assembled.events.similar.length) metrics.l4.sectionsIncluded.push('similar_events'); + if (assembled.orphans.lines.length) metrics.l4.sectionsIncluded.push('orphans'); + if (assembled.recentOrphans.lines.length) metrics.l4.sectionsIncluded.push('recent_orphans'); + if (assembled.arcs.lines.length) metrics.l4.sectionsIncluded.push('arcs'); - return { promptText, injectionLogText, injectionStats }; + metrics.l4.formattingTime = Math.round(performance.now() - T_L4_Start); + metrics.timing.l4Formatting = metrics.l4.formattingTime; + + // Budget 指标 + metrics.budget.total = total.used + (assembled.recentOrphans.tokens || 0); + metrics.budget.limit = TOTAL_BUDGET_MAX; + metrics.budget.utilization = Math.round(metrics.budget.total / TOTAL_BUDGET_MAX * 100); + metrics.budget.breakdown = { + constraints: assembled.facts.tokens, + events: injectionStats.events.tokens + injectionStats.evidence.tokens, + chunks: injectionStats.orphans.tokens, + recentOrphans: recentOrphanStats.tokens || 0, + arcs: assembled.arcs.tokens, + }; + + // L3 额外指标 + metrics.l3.tokens = injectionStats.orphans.tokens + (recentOrphanStats.tokens || 0); + metrics.l3.contextPairsAdded = injectionStats.orphans.contextPairs + recentOrphanStats.contextPairs; + metrics.l3.assemblyTime = Math.round(performance.now() - T_Start - (metrics.timing.l1Constraints || 0) - metrics.l4.formattingTime); + metrics.timing.l3Assembly = metrics.l3.assemblyTime; + + // 质量指标 + const totalFacts = allFacts.length; + metrics.quality.constraintCoverage = totalFacts > 0 + ? Math.round(assembled.facts.lines.length / totalFacts * 100) + : 100; + metrics.quality.eventPrecisionProxy = metrics.l2?.similarityDistribution?.mean || 0; + + const totalChunks = metrics.l3.chunksSelected || 0; + const chunksWithEvents = injectionStats.evidence.attached; + metrics.quality.evidenceDensity = totalChunks > 0 + ? Math.round(chunksWithEvents / totalChunks * 100) + : 0; + + // 检测问题 + metrics.quality.potentialIssues = detectIssues(metrics); + } + + return { promptText, injectionLogText: "", injectionStats, metrics }; } // ───────────────────────────────────────────────────────────────────────────── -// 因果证据补充(给 causalEvents 挂 evidence chunk) +// 因果证据补充 // ───────────────────────────────────────────────────────────────────────────── +/** + * 为因果事件附加证据 chunk + * @param {Array} causalEvents - 因果事件列表 + * @param {Map} eventVectorMap - 事件向量映射 + * @param {Map} chunkVectorMap - chunk 向量映射 + * @param {Map} chunksMap - chunk 映射 + */ async function attachEvidenceToCausalEvents(causalEvents, eventVectorMap, chunkVectorMap, chunksMap) { for (const c of causalEvents) { c._evidenceChunk = null; @@ -881,11 +1097,18 @@ async function attachEvidenceToCausalEvents(causalEvents, eventVectorMap, chunkV } // ───────────────────────────────────────────────────────────────────────────── -// ✅ 向量模式:召回 + 注入(供 story-summary.js 在 GENERATION_STARTED 调用) +// 向量模式:召回 + 注入 // ───────────────────────────────────────────────────────────────────────────── +/** + * 构建向量模式的注入文本 + * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息 + * @param {object} hooks - 钩子 {postToFrame, echo, pendingUserMessage} + * @returns {Promise} {text, logText} + */ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { const { postToFrame = null, echo = null, pendingUserMessage = null } = hooks; + if (!getSettings().storySummary?.enabled) { return { text: "", logText: "" }; } @@ -911,7 +1134,6 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { } const { chatId } = getContext(); - // meta 用于 lastChunkFloor(供 buildVectorPrompt 分桶) const meta = chatId ? await getMeta(chatId) : null; let recallResult = null; @@ -929,14 +1151,14 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { events: recallResult?.events || [], chunks: recallResult?.chunks || [], causalEvents: recallResult?.causalEvents || [], - queryEntities: recallResult?.queryEntities || [], + focusEntities: recallResult?.focusEntities || [], logText: recallResult?.logText || "", + metrics: recallResult?.metrics || null, }; - // 给因果事件挂证据(用于因果行展示) + // 给因果事件挂证据 const causalEvents = recallResult.causalEvents || []; if (causalEvents.length > 0) { - const { chatId } = getContext(); if (chatId) { try { const floors = new Set(); @@ -946,13 +1168,13 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { for (let f = r.start; f <= r.end; f++) floors.add(f); } - const [chunks, chunkVecs, eventVecs] = await Promise.all([ + const [chunksList, chunkVecs, eventVecs] = await Promise.all([ getChunksByFloors(chatId, Array.from(floors)), getAllChunkVectors(chatId), getAllEventVectors(chatId), ]); - const chunksMap = new Map(chunks.map(c => [c.chunkId, c])); + const chunksMap = new Map(chunksList.map(c => [c.chunkId, c])); const chunkVectorMap = new Map(chunkVecs.map(v => [v.chunkId, v.vector])); const eventVectorMap = new Map(eventVecs.map(v => [v.eventId, v.vector])); @@ -971,13 +1193,11 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { } catch (e) { xbLog.error(MODULE_ID, "向量召回失败", e); - // 显式提示(节流) if (echo && canNotifyRecallFail()) { const msg = String(e?.message || "未知错误").replace(/\s+/g, " ").slice(0, 200); await echo(`/echo severity=warning 向量召回失败:${msg}`); } - // iframe 日志也写一份 if (postToFrame) { postToFrame({ type: "RECALL_LOG", @@ -988,7 +1208,6 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { return { text: "", logText: `\n[Vector Recall Failed]\n${String(e?.stack || e?.message || e)}\n` }; } - // 成功但结果为空:也提示,并清空注入(不降级) const hasUseful = (recallResult?.events?.length || 0) > 0 || (recallResult?.chunks?.length || 0) > 0 || @@ -1009,27 +1228,29 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) { return { text: "", logText: "\n[Vector Recall Empty]\nNo recall candidates / vectors not ready.\n" }; } - // 拼装向量 prompt - const { promptText, injectionLogText } = await buildVectorPrompt( + // 拼装向量 prompt,传入 focusEntities 和 metrics + const { promptText, metrics: promptMetrics } = await buildVectorPrompt( store, recallResult, causalById, - recallResult?.queryEntities || [], - meta + recallResult?.focusEntities || [], + meta, + recallResult?.metrics || null ); - // wrapper(沿用面板设置)——必须补回,否则语义回退 + // wrapper const cfg = getSummaryPanelConfig(); let finalText = String(promptText || ""); if (cfg.trigger?.wrapperHead) finalText = cfg.trigger.wrapperHead + "\n" + finalText; if (cfg.trigger?.wrapperTail) finalText = finalText + "\n" + cfg.trigger.wrapperTail; - // 发给涌现窗口:召回报告 + 装配报告 + // METRICS: 生成完整的指标日志 + const metricsLogText = promptMetrics ? formatMetricsLog(promptMetrics) : ''; + + // 发给 iframe if (postToFrame) { - const recallLog = recallResult.logText || ""; - postToFrame({ type: "RECALL_LOG", text: recallLog + (injectionLogText || "") }); + postToFrame({ type: "RECALL_LOG", text: metricsLogText }); } - return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") }; + return { text: finalText, logText: metricsLogText }; } - diff --git a/modules/story-summary/story-summary-ui.js b/modules/story-summary/story-summary-ui.js index 53551aa..c7aa079 100644 --- a/modules/story-summary/story-summary-ui.js +++ b/modules/story-summary/story-summary-ui.js @@ -184,7 +184,7 @@ renderFilterRules(cfg?.textFilterRules || DEFAULT_FILTER_RULES); } - // ═══════════════════════════════════════════════════════════════════════════ + // ═══════════════════════════════════════════════════════════════════════════ // Filter Rules UI // ═══════════════════════════════════════════════════════════════════════════ @@ -257,7 +257,7 @@ } function updateVectorStats(stats) { - $('vector-atom-count').textContent = stats.stateAtoms || 0; + $('vector-atom-count').textContent = stats.stateVectors || 0; $('vector-chunk-count').textContent = stats.chunkCount || 0; $('vector-event-count').textContent = stats.eventVectors || 0; } @@ -276,19 +276,36 @@ const pending = stats.pending || 0; const empty = stats.empty || 0; const fail = stats.fail || 0; + const atomsCount = stats.atomsCount || 0; $('anchor-extracted').textContent = extracted; $('anchor-total').textContent = total; $('anchor-pending').textContent = pending; - - const extra = document.getElementById('anchor-extra'); - if (extra) extra.textContent = `空 ${empty} · 失败 ${fail}`; + $('anchor-atoms-count').textContent = atomsCount; const pendingWrap = $('anchor-pending-wrap'); if (pendingWrap) { pendingWrap.classList.toggle('hidden', pending === 0); } + // 显示 empty/fail 信息 + const extraWrap = $('anchor-extra-wrap'); + const extraSep = $('anchor-extra-sep'); + const extra = $('anchor-extra'); + if (extraWrap && extra) { + if (empty > 0 || fail > 0) { + const parts = []; + if (empty > 0) parts.push(`空 ${empty}`); + if (fail > 0) parts.push(`失败 ${fail}`); + extra.textContent = parts.join(' · '); + extraWrap.style.display = ''; + if (extraSep) extraSep.style.display = ''; + } else { + extraWrap.style.display = 'none'; + if (extraSep) extraSep.style.display = 'none'; + } + } + const emptyWarning = $('vector-empty-l0-warning'); if (emptyWarning) { emptyWarning.classList.toggle('hidden', extracted > 0); @@ -337,7 +354,7 @@ }; } -function initVectorUI() { + function initVectorUI() { $('vector-enabled').onchange = e => { $('vector-config-area').classList.toggle('hidden', !e.target.checked); }; @@ -966,6 +983,7 @@ function initVectorUI() { }; } + function setRecallLog(text) { lastRecallLogText = text || ''; updateRecallLogDisplay(); @@ -974,14 +992,27 @@ function initVectorUI() { function updateRecallLogDisplay() { const content = $('recall-log-content'); if (!content) return; + if (lastRecallLogText) { content.textContent = lastRecallLogText; content.classList.remove('recall-empty'); } else { - setHtml(content, '
暂无召回日志

当 AI 生成回复时,系统会自动进行记忆召回。
召回日志将显示:
• 查询文本
• L1 片段匹配结果
• L2 事件召回详情
• 耗时统计
'); + setHtml(content, `
+ 暂无召回日志

+ 当 AI 生成回复时,系统会自动进行记忆召回。

+ 召回日志将显示:
+ • [L0] Query Understanding - 意图识别
+ • [L1] Constraints - 硬约束注入
+ • [L2] Narrative Retrieval - 事件召回
+ • [L3] Evidence Assembly - 证据装配
+ • [L4] Prompt Formatting - 格式化
+ • [Budget] Token 预算使用情况
+ • [Quality] 质量指标与潜在问题 +
`); } } + // ═══════════════════════════════════════════════════════════════════════════ // Editor // ═══════════════════════════════════════════════════════════════════════════ @@ -1117,7 +1148,7 @@ function initVectorUI() { }; } - function openEditor(section) { + function openEditor(section) { currentEditSection = section; const meta = SECTION_META[section]; const es = $('editor-struct'); @@ -1368,10 +1399,10 @@ function initVectorUI() { btnCancel.classList.remove('hidden'); btnClear.classList.add('hidden'); - const percent = d.total > 0 ? Math.round(d.current / d.total * 100) : 0; - progress.querySelector('.progress-inner').style.width = percent + '%'; - const displayText = d.message || `${d.phase || ''}: ${d.current}/${d.total}`; - progress.querySelector('.progress-text').textContent = displayText; + const percent = d.total > 0 ? Math.round(d.current / d.total * 100) : 0; + progress.querySelector('.progress-inner').style.width = percent + '%'; + const displayText = d.message || `${d.phase || ''}: ${d.current}/${d.total}`; + progress.querySelector('.progress-text').textContent = displayText; } break; } diff --git a/modules/story-summary/story-summary.css b/modules/story-summary/story-summary.css index a2f5c07..3c3adb2 100644 --- a/modules/story-summary/story-summary.css +++ b/modules/story-summary/story-summary.css @@ -2873,3 +2873,28 @@ h1 span { padding: 6px 10px; } } + +/* ═══════════════════════════════════════════════════════════════════════════ + Metrics Log Styling + ═══════════════════════════════════════════════════════════════════════════ */ + +#recall-log-content { + font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace; + font-size: 11px; + line-height: 1.5; + white-space: pre; + overflow-x: auto; + tab-size: 4; +} + +#recall-log-content .metric-warn { + color: #f59e0b; +} + +#recall-log-content .metric-error { + color: #ef4444; +} + +#recall-log-content .metric-good { + color: #22c55e; +} \ No newline at end of file diff --git a/modules/story-summary/story-summary.html b/modules/story-summary/story-summary.html index 4de2c26..23fc7ac 100644 --- a/modules/story-summary/story-summary.html +++ b/modules/story-summary/story-summary.html @@ -116,7 +116,6 @@
选择角色
-
暂无角色
@@ -425,6 +424,16 @@
(待提取 0 楼)
+ · +
+ L0 Atoms: + 0 + 条 +
+ + @@ -452,7 +461,7 @@
- L0 Atoms: + L0 Vectors: 0
@@ -571,4 +580,4 @@ - + \ No newline at end of file diff --git a/modules/story-summary/story-summary.js b/modules/story-summary/story-summary.js index dd82b9b..4e7fd9e 100644 --- a/modules/story-summary/story-summary.js +++ b/modules/story-summary/story-summary.js @@ -90,7 +90,7 @@ import { exportVectors, importVectors } from "./vector/storage/vector-io.js"; const MODULE_ID = "storySummary"; const SUMMARY_CONFIG_KEY = "storySummaryPanelConfig"; const iframePath = `${extensionFolderPath}/modules/story-summary/story-summary.html`; -const VALID_SECTIONS = ["keywords", "events", "characters", "arcs", "world"]; +const VALID_SECTIONS = ["keywords", "events", "characters", "arcs", "facts"]; const MESSAGE_EVENT = "message"; // ═══════════════════════════════════════════════════════════════════════════ @@ -236,7 +236,6 @@ async function sendVectorStatsToFrame() { const stats = await getStorageStats(chatId); const chunkStatus = await getChunkBuildStatus(); const totalMessages = chat?.length || 0; - const stateAtomsCount = getStateAtomsCount(); const stateVectorsCount = await getStateVectorsCount(chatId); const cfg = getVectorConfig(); @@ -256,7 +255,6 @@ async function sendVectorStatsToFrame() { builtFloors: chunkStatus.builtFloors, totalFloors: chunkStatus.totalFloors, totalMessages, - stateAtoms: stateAtomsCount, stateVectors: stateVectorsCount, }, mismatch, @@ -265,7 +263,8 @@ async function sendVectorStatsToFrame() { async function sendAnchorStatsToFrame() { const stats = await getAnchorStats(); - postToFrame({ type: "ANCHOR_STATS", stats }); + const atomsCount = getStateAtomsCount(); + postToFrame({ type: "ANCHOR_STATS", stats: { ...stats, atomsCount } }); } async function handleAnchorGenerate() { @@ -290,10 +289,15 @@ async function handleAnchorGenerate() { postToFrame({ type: "ANCHOR_GEN_PROGRESS", current: 0, total: 1, message: "分析中..." }); try { + // Phase 1: L0 提取 + Phase 2: L0 向量化(在 incrementalExtractAtoms 内部完成) await incrementalExtractAtoms(chatId, chat, (message, current, total) => { postToFrame({ type: "ANCHOR_GEN_PROGRESS", current, total, message }); }); + // Phase 3: 处理 pending L1 Chunks + postToFrame({ type: "ANCHOR_GEN_PROGRESS", current: 0, total: 1, message: "向量化 L1..." }); + await buildIncrementalChunks({ vectorConfig: vectorCfg }); + await sendAnchorStatsToFrame(); await sendVectorStatsToFrame(); @@ -1212,9 +1216,11 @@ async function handleChatChanged() { if (frameReady) { await sendFrameBaseData(store, newLength); sendFrameFullData(store, newLength); + + sendAnchorStatsToFrame(); + sendVectorStatsToFrame(); } - // 检测向量完整性并提醒(仅提醒,不自动操作) setTimeout(() => checkVectorIntegrityAndWarn(), 2000); } diff --git a/modules/story-summary/vector/llm/atom-extraction.js b/modules/story-summary/vector/llm/atom-extraction.js index 32826b1..ce2f710 100644 --- a/modules/story-summary/vector/llm/atom-extraction.js +++ b/modules/story-summary/vector/llm/atom-extraction.js @@ -1,5 +1,5 @@ -// ============================================================================ -// atom-extraction.js - 30并发 + 首批错开 + 取消支持 + 进度回调 +// ============================================================================ +// atom-extraction.js - L0 叙事锚点提取(三层 themes 版) // ============================================================================ import { callLLM, parseJson } from './llm-service.js'; @@ -12,7 +12,7 @@ const CONCURRENCY = 10; const RETRY_COUNT = 2; const RETRY_DELAY = 500; const DEFAULT_TIMEOUT = 20000; -const STAGGER_DELAY = 80; // 首批错开延迟(ms) +const STAGGER_DELAY = 80; let batchCancelled = false; @@ -24,49 +24,150 @@ export function isBatchCancelled() { return batchCancelled; } -const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话(用户发言+角色回复)中提取4-8个关键锚点。 +// ============================================================================ +// L0 提取 Prompt(三层 themes) +// ============================================================================ + +const SYSTEM_PROMPT = `你是叙事锚点提取器。从一轮对话中提取4-8个关键锚点,用于后续语义检索。 输入格式: - ... - ... + ... + ... -只输出严格JSON(不要解释,不要前后多余文字): -{"atoms":[{"t":"类型","s":"主体","v":"值","f":"来源"}]} +只输出严格JSON: +{"atoms":[{"t":"类型","s":"主体","o":"客体","v":"谓词","l":"地点","f":"来源","th":{"fn":[],"pt":[],"kw":[]}}]} -类型(t): -- emo: 情绪状态(需要s主体) -- loc: 地点/场景 -- act: 关键动作(需要s主体) -- rev: 揭示/发现 -- ten: 冲突/张力 -- dec: 决定/承诺 +## 类型(t) +- emo: 情绪状态变化 +- act: 关键动作/行为 +- rev: 揭示/发现/真相 +- dec: 决定/承诺/宣言 +- ten: 冲突/张力/对立 +- loc: 场景/地点变化 + +## 字段说明 +- s: 主体(必填) +- o: 客体(可空) +- v: 谓词,15字内(必填) +- l: 地点(可空) +- f: "u"=用户 / "a"=角色(必填) +- th: 主题标签(必填,结构化对象) + +## th 三层结构 +fn(叙事功能)1-2个,枚举: + establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探 + commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转 + bond=连接羁绊 | break=断裂破坏 + +pt(互动模式)1-3个,枚举: + power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺 + asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密 + ritual=仪式正式 | routine=日常惯例 | triangular=三方介入 + +kw(具体关键词)1-3个,自由格式 + +## 示例输出 +{"atoms":[ + {"t":"act","s":"艾拉","o":"古龙","v":"用圣剑刺穿心脏","l":"火山口","f":"a", + "th":{"fn":["commit"],"pt":["power_down","ritual"],"kw":["战斗","牺牲"]}}, + {"t":"emo","s":"林夏","o":"陆远","v":"意识到自己喜欢他","l":"","f":"a", + "th":{"fn":["reveal","escalate"],"pt":["asymmetric","secluded"],"kw":["心动","暗恋"]}}, + {"t":"dec","s":"凯尔","o":"王国","v":"放弃王位继承权","l":"王座厅","f":"a", + "th":{"fn":["commit","break"],"pt":["ritual","witnessed"],"kw":["抉择","自由"]}}, + {"t":"rev","s":"","o":"","v":"管家其实是间谍","l":"","f":"a", + "th":{"fn":["reveal"],"pt":["asymmetric"],"kw":["背叛","真相"]}}, + {"t":"ten","s":"兄弟二人","o":"","v":"为遗产反目","l":"","f":"a", + "th":{"fn":["conflict","break"],"pt":["power_contest"],"kw":["冲突","亲情破裂"]}} +]} 规则: -- s: 主体(谁) -- v: 简洁值,10字内 -- f: "u"=用户发言中, "a"=角色回复中 - 只提取对未来检索有价值的锚点 -- 无明显锚点返回空数组`; +- fn 回答"这在故事里推动了什么" +- pt 回答"这是什么结构的互动" +- kw 用于细粒度检索 +- 无明显锚点时返回 {"atoms":[]}`; + +const JSON_PREFILL = '{"atoms":['; + +// ============================================================================ +// Semantic 构建 +// ============================================================================ function buildSemantic(atom, userName, aiName) { - const speaker = atom.f === 'u' ? userName : aiName; - const s = atom.s || speaker; + const type = atom.t || 'act'; + const subject = atom.s || (atom.f === 'u' ? userName : aiName); + const object = atom.o || ''; + const verb = atom.v || ''; + const location = atom.l || ''; + + // 三层 themes 合并 + const th = atom.th || {}; + const tags = [ + ...(Array.isArray(th.fn) ? th.fn : []), + ...(Array.isArray(th.pt) ? th.pt : []), + ...(Array.isArray(th.kw) ? th.kw : []), + ].filter(Boolean); - switch (atom.t) { - case 'emo': return `${s}感到${atom.v}`; - case 'loc': return `场景:${atom.v}`; - case 'act': return `${s}${atom.v}`; - case 'rev': return `揭示:${atom.v}`; - case 'ten': return `冲突:${atom.v}`; - case 'dec': return `${s}决定${atom.v}`; - default: return `${s} ${atom.v}`; + const typePart = `<${type}>`; + const themePart = tags.length > 0 ? ` [${tags.join('/')}]` : ''; + const locPart = location ? ` 在${location}` : ''; + const objPart = object ? ` -> ${object}` : ''; + + let semantic = ''; + switch (type) { + case 'emo': + semantic = object + ? `${typePart} ${subject} -> ${verb} (对${object})${locPart}` + : `${typePart} ${subject} -> ${verb}${locPart}`; + break; + + case 'act': + semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`; + break; + + case 'rev': + semantic = object + ? `${typePart} 揭示: ${verb} (关于${object})${locPart}` + : `${typePart} 揭示: ${verb}${locPart}`; + break; + + case 'dec': + semantic = object + ? `${typePart} ${subject} -> ${verb} (对${object})${locPart}` + : `${typePart} ${subject} -> ${verb}${locPart}`; + break; + + case 'ten': + semantic = object + ? `${typePart} ${subject} <-> ${object}: ${verb}${locPart}` + : `${typePart} ${subject}: ${verb}${locPart}`; + break; + + case 'loc': + semantic = location + ? `${typePart} 场景: ${location} - ${verb}` + : `${typePart} 场景: ${verb}`; + break; + + default: + semantic = `${typePart} ${subject} -> ${verb}${objPart}${locPart}`; } + + return semantic + themePart; } +// ============================================================================ +// 睡眠工具 +// ============================================================================ + const sleep = (ms) => new Promise(r => setTimeout(r, ms)); +// ============================================================================ +// 单轮提取(带重试) +// ============================================================================ + async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) { const { timeout = DEFAULT_TIMEOUT } = options; @@ -86,8 +187,6 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op const input = `\n${parts.join('\n')}\n`; - xbLog.info(MODULE_ID, `floor ${aiFloor} 发送输入 len=${input.length}`); - for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) { if (batchCancelled) return []; @@ -95,16 +194,15 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op const response = await callLLM([ { role: 'system', content: SYSTEM_PROMPT }, { role: 'user', content: input }, - { role: 'assistant', content: '收到,开始提取并仅输出 JSON。' }, + { role: 'assistant', content: JSON_PREFILL }, ], { temperature: 0.2, - max_tokens: 500, + max_tokens: 1000, timeout, }); const rawText = String(response || ''); if (!rawText.trim()) { - xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:响应为空`); if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; @@ -112,11 +210,13 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op return null; } + const fullJson = JSON_PREFILL + rawText; + let parsed; try { - parsed = parseJson(rawText); + parsed = parseJson(fullJson); } catch (e) { - xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:JSON 异常`); + xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败`); if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; @@ -125,8 +225,6 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op } if (!parsed?.atoms || !Array.isArray(parsed.atoms)) { - xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 缺失,raw="${rawText.slice(0, 300)}"`); - xbLog.warn(MODULE_ID, `floor ${aiFloor} 解析失败:atoms 缺失`); if (attempt < RETRY_COUNT) { await sleep(RETRY_DELAY); continue; @@ -141,20 +239,20 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op floor: aiFloor, type: a.t, subject: a.s || null, - value: String(a.v).slice(0, 30), + object: a.o || null, + value: String(a.v).slice(0, 50), + location: a.l || null, source: a.f === 'u' ? 'user' : 'ai', + themes: a.th || { fn: [], pt: [], kw: [] }, semantic: buildSemantic(a, userName, aiName), })); - if (!filtered.length) { - xbLog.warn(MODULE_ID, `floor ${aiFloor} atoms 为空,raw="${rawText.slice(0, 300)}"`); - } + return filtered; } catch (e) { if (batchCancelled) return null; if (attempt < RETRY_COUNT) { - xbLog.warn(MODULE_ID, `floor ${aiFloor} 第${attempt + 1}次失败,重试...`, e?.message); await sleep(RETRY_DELAY * (attempt + 1)); continue; } @@ -166,18 +264,14 @@ async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, op return null; } -/** - * 单轮配对提取(增量时使用) - */ export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) { return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options); } -/** - * 批量提取(首批 staggered 启动) - * @param {Array} chat - * @param {Function} onProgress - (current, total, failed) => void - */ +// ============================================================================ +// 批量提取 +// ============================================================================ + export async function batchExtractAtoms(chat, onProgress) { if (!chat?.length) return []; @@ -198,14 +292,10 @@ export async function batchExtractAtoms(chat, onProgress) { let failed = 0; for (let i = 0; i < pairs.length; i += CONCURRENCY) { - if (batchCancelled) { - xbLog.info(MODULE_ID, `批量提取已取消 (${completed}/${pairs.length})`); - break; - } + if (batchCancelled) break; const batch = pairs.slice(i, i + CONCURRENCY); - // ★ 首批 staggered 启动:错开 80ms 发送 if (i === 0) { const promises = batch.map((pair, idx) => (async () => { await sleep(idx * STAGGER_DELAY); @@ -213,10 +303,15 @@ export async function batchExtractAtoms(chat, onProgress) { if (batchCancelled) return; try { - const atoms = await extractAtomsForRoundWithRetry(pair.userMsg, pair.aiMsg, pair.aiFloor, { timeout: DEFAULT_TIMEOUT }); + const atoms = await extractAtomsForRoundWithRetry( + pair.userMsg, + pair.aiMsg, + pair.aiFloor, + { timeout: DEFAULT_TIMEOUT } + ); if (atoms?.length) { allAtoms.push(...atoms); - } else { + } else if (atoms === null) { failed++; } } catch { @@ -227,14 +322,18 @@ export async function batchExtractAtoms(chat, onProgress) { })()); await Promise.all(promises); } else { - // 后续批次正常并行 const promises = batch.map(pair => - extractAtomsForRoundWithRetry(pair.userMsg, pair.aiMsg, pair.aiFloor, { timeout: DEFAULT_TIMEOUT }) + extractAtomsForRoundWithRetry( + pair.userMsg, + pair.aiMsg, + pair.aiFloor, + { timeout: DEFAULT_TIMEOUT } + ) .then(atoms => { if (batchCancelled) return; if (atoms?.length) { allAtoms.push(...atoms); - } else { + } else if (atoms === null) { failed++; } completed++; @@ -251,14 +350,12 @@ export async function batchExtractAtoms(chat, onProgress) { await Promise.all(promises); } - // 批次间隔 if (i + CONCURRENCY < pairs.length && !batchCancelled) { await sleep(30); } } - const status = batchCancelled ? '已取消' : '完成'; - xbLog.info(MODULE_ID, `批量提取${status}: ${allAtoms.length} atoms, ${completed}/${pairs.length}, ${failed} 失败`); + xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`); return allAtoms; } diff --git a/modules/story-summary/vector/llm/llm-service.js b/modules/story-summary/vector/llm/llm-service.js index a870b0d..37f2357 100644 --- a/modules/story-summary/vector/llm/llm-service.js +++ b/modules/story-summary/vector/llm/llm-service.js @@ -1,14 +1,13 @@ -// ═══════════════════════════════════════════════════════════════════════════ -// vector/llm/llm-service.js +// ═══════════════════════════════════════════════════════════════════════════ +// vector/llm/llm-service.js - 修复 prefill 传递方式 // ═══════════════════════════════════════════════════════════════════════════ import { xbLog } from '../../../../core/debug-core.js'; import { getVectorConfig } from '../../data/config.js'; const MODULE_ID = 'vector-llm-service'; -const SILICONFLOW_API_URL = 'https://api.siliconflow.cn'; +const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1'; const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B'; -// 唯一 ID 计数器 let callCounter = 0; function getStreamingModule() { @@ -30,6 +29,7 @@ function b64UrlEncode(str) { /** * 统一LLM调用 - 走酒馆后端(非流式) + * 修复:assistant prefill 用 bottomassistant 参数传递 */ export async function callLLM(messages, options = {}) { const { @@ -46,9 +46,16 @@ export async function callLLM(messages, options = {}) { throw new Error('L0 requires siliconflow API key'); } - const top64 = b64UrlEncode(JSON.stringify(messages)); + // ★ 关键修复:分离 assistant prefill + let topMessages = [...messages]; + let assistantPrefill = ''; + + if (topMessages.length > 0 && topMessages[topMessages.length - 1]?.role === 'assistant') { + const lastMsg = topMessages.pop(); + assistantPrefill = lastMsg.content || ''; + } - // 每次调用用唯一 ID,避免 session 冲突 + const top64 = b64UrlEncode(JSON.stringify(topMessages)); const uniqueId = generateUniqueId('l0'); const args = { @@ -64,8 +71,12 @@ export async function callLLM(messages, options = {}) { model: DEFAULT_L0_MODEL, }; + // ★ 用 bottomassistant 参数传递 prefill + if (assistantPrefill) { + args.bottomassistant = assistantPrefill; + } + try { - // 非流式直接返回结果 const result = await mod.xbgenrawCommand(args, ''); return String(result ?? ''); } catch (e) { diff --git a/modules/story-summary/vector/llm/query-expansion.js b/modules/story-summary/vector/llm/query-expansion.js index 2a8ea32..f4911b1 100644 --- a/modules/story-summary/vector/llm/query-expansion.js +++ b/modules/story-summary/vector/llm/query-expansion.js @@ -1,52 +1,228 @@ -// ═══════════════════════════════════════════════════════════════════════════ -// query-expansion.js - 完整输入,不截断 -// ═══════════════════════════════════════════════════════════════════════════ +// ============================================================================ +// query-expansion.js - 检索查询生成器(三层 themes 版) +// ============================================================================ import { callLLM, parseJson } from './llm-service.js'; import { xbLog } from '../../../../core/debug-core.js'; import { filterText } from '../utils/text-filter.js'; +import { getContext } from '../../../../../../../extensions.js'; +import { getSummaryStore } from '../../data/store.js'; const MODULE_ID = 'query-expansion'; const SESSION_ID = 'xb6'; -const SYSTEM_PROMPT = `你是检索词生成器。根据最近对话,输出用于检索历史剧情的关键词。 +// ============================================================================ +// 系统提示词 +// ============================================================================ -只输出JSON: -{"e":["显式人物/地名"],"i":["隐含人物/情绪/话题"],"q":["检索短句"]} +const SYSTEM_PROMPT = `你是检索查询生成器。根据当前对话上下文,生成用于检索历史剧情的查询语句。 -规则: -- e: 对话中明确提到的人名/地名,1-4个 -- i: 推断出的相关人物/情绪/话题,1-5个 -- q: 用于向量检索的短句,2-3个,每个15字内 -- 关注:正在讨论什么、涉及谁、情绪氛围`; +## 输出格式(严格JSON) +{ + "focus": ["焦点人物"], + "fn": ["叙事功能"], + "pt": ["互动模式"], + "kw": ["关键词"], + "queries": ["DSL查询语句"] +} -/** - * Query Expansion - * @param {Array} messages - 完整消息数组(最后2-3轮) - */ -export async function expandQuery(messages, options = {}) { - const { timeout = 6000 } = options; +## fn(叙事功能)枚举 +establish=建立设定 | escalate=升级加剧 | reveal=揭示发现 | challenge=挑战试探 +commit=承诺锁定 | conflict=冲突对抗 | resolve=解决收束 | transform=转变逆转 +bond=连接羁绊 | break=断裂破坏 - if (!messages?.length) { - return { entities: [], implicit: [], queries: [] }; +## pt(互动模式)枚举 +power_down=上对下 | power_up=下对上 | power_equal=对等 | power_contest=争夺 +asymmetric=信息不对称 | witnessed=有观众 | secluded=隔绝私密 +ritual=仪式正式 | routine=日常惯例 | triangular=三方介入 + +## DSL 查询格式 +- 主体 -> 动作 (-> 客体)? (在地点)? +- 主体 -> 情绪 (对客体)? +- 主体 -> 决定/承诺 (对客体)? +- 揭示: 内容 (关于客体)? +- 主体A <-> 主体B: 冲突内容 +- 场景: 地点/状态 + +## 规则 +- focus: 核心人物,1-4个 +- fn: 当前对话涉及的叙事功能,1-3个 +- pt: 当前对话涉及的互动模式,1-3个 +- kw: 具体关键词,1-4个 +- queries: 2-4条 DSL 查询 + +## 示例 + +输入:艾拉说"那把剑...我记得它的重量,在火山口的时候" +输出: +{ + "focus": ["艾拉", "古龙"], + "fn": ["commit", "bond"], + "pt": ["power_down", "ritual"], + "kw": ["圣剑", "战斗", "火山口"], + "queries": [ + " 艾拉 -> 战斗/使用圣剑 -> 古龙 [commit/power_down]", + " 场景: 火山口 [ritual]", + " 艾拉 -> 牺牲/决绝 [commit]" + ] +}`; + +// ============================================================================ +// 上下文构建 +// ============================================================================ + +function getCharacterContext() { + const context = getContext(); + const char = context.characters?.[context.characterId]; + + if (!char) { + return { name: '', description: '', personality: '' }; } - // 完整格式化,不截断 - const input = messages.map(m => { - const speaker = m.is_user ? '用户' : (m.name || '角色'); - const text = filterText(m.mes || '').trim(); - return `【${speaker}】\n${text}`; - }).join('\n\n'); + return { + name: char.name || '', + description: (char.description || '').slice(0, 500), + personality: (char.personality || '').slice(0, 300), + }; +} + +function getPersonaContext() { + const context = getContext(); + + if (typeof window !== 'undefined' && window.power_user?.persona_description) { + return String(window.power_user.persona_description).slice(0, 500); + } + + if (context.persona_description) { + return String(context.persona_description).slice(0, 500); + } + + return ''; +} + +function getRecentEvents(count = 8) { + const store = getSummaryStore(); + const events = store?.json?.events || []; + + return events + .slice(-count) + .map(e => { + const time = e.timeLabel || ''; + const title = e.title || ''; + const participants = (e.participants || []).join('/'); + const summary = (e.summary || '').replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '').slice(0, 80); + + return time + ? `[${time}] ${title || participants}: ${summary}` + : `${title || participants}: ${summary}`; + }); +} + +function getRelevantArcs(focusHint = []) { + const store = getSummaryStore(); + const arcs = store?.json?.arcs || []; + + if (!arcs.length) return []; + + const hintSet = new Set(focusHint.map(s => String(s).toLowerCase())); + + const sorted = [...arcs].sort((a, b) => { + const aHit = hintSet.has(String(a.name || '').toLowerCase()) ? 1 : 0; + const bHit = hintSet.has(String(b.name || '').toLowerCase()) ? 1 : 0; + return bHit - aHit; + }); + + return sorted.slice(0, 4).map(a => { + const progress = Math.round((a.progress || 0) * 100); + return `${a.name}: ${a.trajectory || '未知状态'} (${progress}%)`; + }); +} + +function extractNamesFromMessages(messages) { + const names = new Set(); + + for (const m of messages) { + if (m.name) names.add(m.name); + } + + const text = messages.map(m => m.mes || '').join(' '); + const namePattern = /[\u4e00-\u9fff]{2,4}/g; + const matches = text.match(namePattern) || []; + + const freq = {}; + for (const name of matches) { + freq[name] = (freq[name] || 0) + 1; + } + + Object.entries(freq) + .filter(([, count]) => count >= 2) + .forEach(([name]) => names.add(name)); + + return Array.from(names).slice(0, 6); +} + +// ============================================================================ +// 主函数 +// ============================================================================ + +export async function expandQuery(messages, options = {}) { + const { pendingUserMessage = null, timeout = 6000 } = options; + + if (!messages?.length && !pendingUserMessage) { + return { focus: [], fn: [], pt: [], kw: [], queries: [] }; + } const T0 = performance.now(); + const character = getCharacterContext(); + const persona = getPersonaContext(); + const nameHints = extractNamesFromMessages(messages || []); + const recentEvents = getRecentEvents(8); + const arcs = getRelevantArcs(nameHints); + + const dialogueParts = []; + + for (const m of (messages || [])) { + const speaker = m.is_user ? '用户' : (m.name || '角色'); + const text = filterText(m.mes || '').trim(); + if (text) { + dialogueParts.push(`【${speaker}】\n${text.slice(0, 400)}`); + } + } + + if (pendingUserMessage) { + dialogueParts.push(`【用户(刚输入)】\n${filterText(pendingUserMessage).slice(0, 400)}`); + } + + const inputParts = []; + + if (character.name) { + inputParts.push(`## 当前角色\n${character.name}: ${character.description || character.personality || '无描述'}`); + } + + if (persona) { + inputParts.push(`## 用户人设\n${persona}`); + } + + if (recentEvents.length) { + inputParts.push(`## 近期剧情\n${recentEvents.map((e, i) => `${i + 1}. ${e}`).join('\n')}`); + } + + if (arcs.length) { + inputParts.push(`## 角色状态\n${arcs.join('\n')}`); + } + + inputParts.push(`## 最近对话\n${dialogueParts.join('\n\n')}`); + + const input = inputParts.join('\n\n'); + try { const response = await callLLM([ { role: 'system', content: SYSTEM_PROMPT }, { role: 'user', content: input }, ], { temperature: 0.15, - max_tokens: 250, + max_tokens: 500, timeout, sessionId: SESSION_ID, }); @@ -54,49 +230,104 @@ export async function expandQuery(messages, options = {}) { const parsed = parseJson(response); if (!parsed) { xbLog.warn(MODULE_ID, 'JSON解析失败', response?.slice(0, 200)); - return { entities: [], implicit: [], queries: [] }; + return { focus: [], fn: [], pt: [], kw: [], queries: [] }; } const result = { - entities: Array.isArray(parsed.e) ? parsed.e.slice(0, 5) : [], - implicit: Array.isArray(parsed.i) ? parsed.i.slice(0, 6) : [], - queries: Array.isArray(parsed.q) ? parsed.q.slice(0, 4) : [], + focus: Array.isArray(parsed.focus) ? parsed.focus.slice(0, 5) : [], + fn: Array.isArray(parsed.fn) ? parsed.fn.slice(0, 4) : [], + pt: Array.isArray(parsed.pt) ? parsed.pt.slice(0, 4) : [], + kw: Array.isArray(parsed.kw) ? parsed.kw.slice(0, 5) : [], + queries: Array.isArray(parsed.queries) ? parsed.queries.slice(0, 5) : [], }; - xbLog.info(MODULE_ID, `完成 (${Math.round(performance.now() - T0)}ms) e=${result.entities.length} i=${result.implicit.length} q=${result.queries.length}`); + xbLog.info(MODULE_ID, `完成 (${Math.round(performance.now() - T0)}ms) focus=[${result.focus.join(',')}] fn=[${result.fn.join(',')}]`); return result; } catch (e) { xbLog.error(MODULE_ID, '调用失败', e); - return { entities: [], implicit: [], queries: [] }; + return { focus: [], fn: [], pt: [], kw: [], queries: [] }; } } +// ============================================================================ // 缓存 +// ============================================================================ + const cache = new Map(); const CACHE_TTL = 300000; -function hashMessages(messages) { - const text = messages.slice(-2).map(m => (m.mes || '').slice(0, 100)).join('|'); +function hashMessages(messages, pending = '') { + const text = (messages || []) + .slice(-3) + .map(m => (m.mes || '').slice(0, 100)) + .join('|') + '|' + (pending || '').slice(0, 100); + let h = 0; - for (let i = 0; i < text.length; i++) h = ((h << 5) - h + text.charCodeAt(i)) | 0; + for (let i = 0; i < text.length; i++) { + h = ((h << 5) - h + text.charCodeAt(i)) | 0; + } return h.toString(36); } export async function expandQueryCached(messages, options = {}) { - const key = hashMessages(messages); + const key = hashMessages(messages, options.pendingUserMessage); const cached = cache.get(key); - if (cached && Date.now() - cached.time < CACHE_TTL) return cached.result; + + if (cached && Date.now() - cached.time < CACHE_TTL) { + return cached.result; + } const result = await expandQuery(messages, options); - if (result.entities.length || result.queries.length) { - if (cache.size > 50) cache.delete(cache.keys().next().value); + + if (result.focus.length || result.queries.length) { + if (cache.size > 50) { + cache.delete(cache.keys().next().value); + } cache.set(key, { result, time: Date.now() }); } + return result; } +// ============================================================================ +// 辅助函数:构建检索文本 +// ============================================================================ + +/** + * 将 expansion 结果转换为检索文本 + * 三层 themes 自然拼入,让向量自动编码 + */ export function buildSearchText(expansion) { - return [...(expansion.entities || []), ...(expansion.implicit || []), ...(expansion.queries || [])] - .filter(Boolean).join(' '); + const parts = []; + + // focus 人物 + if (expansion.focus?.length) { + parts.push(expansion.focus.join(' ')); + } + + // fn + pt + kw 合并为标签 + const tags = [ + ...(expansion.fn || []), + ...(expansion.pt || []), + ...(expansion.kw || []), + ].filter(Boolean); + + if (tags.length) { + parts.push(`[${tags.join('/')}]`); + } + + // queries + if (expansion.queries?.length) { + parts.push(...expansion.queries); + } + + return parts.filter(Boolean).join(' ').slice(0, 1500); +} + +/** + * 提取实体列表(兼容旧接口) + */ +export function getEntitiesFromExpansion(expansion) { + return expansion?.focus || []; } diff --git a/modules/story-summary/vector/llm/reranker.js b/modules/story-summary/vector/llm/reranker.js new file mode 100644 index 0000000..e070013 --- /dev/null +++ b/modules/story-summary/vector/llm/reranker.js @@ -0,0 +1,184 @@ +// ═══════════════════════════════════════════════════════════════════════════ +// Reranker - 硅基 bge-reranker-v2-m3 +// 对候选文档进行精排,过滤与 query 不相关的内容 +// ═══════════════════════════════════════════════════════════════════════════ + +import { xbLog } from '../../../../core/debug-core.js'; +import { getApiKey } from './siliconflow.js'; + +const MODULE_ID = 'reranker'; +const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank'; +const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3'; +const DEFAULT_TIMEOUT = 15000; +const MAX_DOCUMENTS = 100; // API 限制 + +/** + * 对文档列表进行 Rerank 精排 + * + * @param {string} query - 查询文本 + * @param {Array} documents - 文档文本列表 + * @param {object} options - 选项 + * @param {number} options.topN - 返回前 N 个结果,默认 40 + * @param {number} options.timeout - 超时时间,默认 15000ms + * @param {AbortSignal} options.signal - 取消信号 + * @returns {Promise>} 排序后的结果 + */ +export async function rerank(query, documents, options = {}) { + const { topN = 40, timeout = DEFAULT_TIMEOUT, signal } = options; + + if (!query?.trim()) { + xbLog.warn(MODULE_ID, 'query 为空,跳过 rerank'); + return documents.map((_, i) => ({ index: i, relevance_score: 0.5 })); + } + + if (!documents?.length) { + return []; + } + + const key = getApiKey(); + if (!key) { + xbLog.warn(MODULE_ID, '未配置 API Key,跳过 rerank'); + return documents.map((_, i) => ({ index: i, relevance_score: 0.5 })); + } + + // 截断超长文档列表 + const truncatedDocs = documents.slice(0, MAX_DOCUMENTS); + if (documents.length > MAX_DOCUMENTS) { + xbLog.warn(MODULE_ID, `文档数 ${documents.length} 超过限制 ${MAX_DOCUMENTS},已截断`); + } + + // 过滤空文档,记录原始索引 + const validDocs = []; + const indexMap = []; // validDocs index → original index + + for (let i = 0; i < truncatedDocs.length; i++) { + const text = String(truncatedDocs[i] || '').trim(); + if (text) { + validDocs.push(text); + indexMap.push(i); + } + } + + if (!validDocs.length) { + xbLog.warn(MODULE_ID, '无有效文档,跳过 rerank'); + return []; + } + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeout); + + try { + const T0 = performance.now(); + + const response = await fetch(RERANK_URL, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${key}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: RERANK_MODEL, + query: query.slice(0, 1000), // 限制 query 长度 + documents: validDocs, + top_n: Math.min(topN, validDocs.length), + return_documents: false, + }), + signal: signal || controller.signal, + }); + + clearTimeout(timeoutId); + + if (!response.ok) { + const errorText = await response.text().catch(() => ''); + throw new Error(`Rerank API ${response.status}: ${errorText.slice(0, 200)}`); + } + + const data = await response.json(); + const results = data.results || []; + + // 映射回原始索引 + const mapped = results.map(r => ({ + index: indexMap[r.index], + relevance_score: r.relevance_score ?? 0, + })); + + const elapsed = Math.round(performance.now() - T0); + xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`); + + return mapped; + + } catch (e) { + clearTimeout(timeoutId); + + if (e?.name === 'AbortError') { + xbLog.warn(MODULE_ID, 'Rerank 超时或取消'); + } else { + xbLog.error(MODULE_ID, 'Rerank 失败', e); + } + + // 降级:返回原顺序,分数均匀分布 + return documents.slice(0, topN).map((_, i) => ({ + index: i, + relevance_score: 1 - (i / documents.length) * 0.5, + })); + } +} + +/** + * 对 chunk 对象列表进行 Rerank + * + * @param {string} query - 查询文本 + * @param {Array} chunks - chunk 对象列表,需要有 text 字段 + * @param {object} options - 选项 + * @returns {Promise>} 排序后的 chunk 列表,带 _rerankScore 字段 + */ +export async function rerankChunks(query, chunks, options = {}) { + const { topN = 40, minScore = 0.1 } = options; + + if (!chunks?.length) return []; + if (chunks.length <= topN) { + // 数量不超限,仍然 rerank 以获取分数,但不过滤 + const texts = chunks.map(c => c.text || c.semantic || ''); + const results = await rerank(query, texts, { topN: chunks.length, ...options }); + + const scoreMap = new Map(results.map(r => [r.index, r.relevance_score])); + return chunks.map((c, i) => ({ + ...c, + _rerankScore: scoreMap.get(i) ?? 0.5, + })).sort((a, b) => b._rerankScore - a._rerankScore); + } + + const texts = chunks.map(c => c.text || c.semantic || ''); + const results = await rerank(query, texts, { topN, ...options }); + + // 过滤低分 + 排序 + const selected = results + .filter(r => r.relevance_score >= minScore) + .sort((a, b) => b.relevance_score - a.relevance_score) + .map(r => ({ + ...chunks[r.index], + _rerankScore: r.relevance_score, + })); + + return selected; +} + +/** + * 测试 Rerank 服务连接 + */ +export async function testRerankService() { + const key = getApiKey(); + if (!key) { + throw new Error('请配置硅基 API Key'); + } + + try { + const results = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 }); + return { + success: true, + message: `连接成功,返回 ${results.length} 个结果`, + }; + } catch (e) { + throw new Error(`连接失败: ${e.message}`); + } +} diff --git a/modules/story-summary/vector/pipeline/state-integration.js b/modules/story-summary/vector/pipeline/state-integration.js index 5e7a132..012fe24 100644 --- a/modules/story-summary/vector/pipeline/state-integration.js +++ b/modules/story-summary/vector/pipeline/state-integration.js @@ -1,9 +1,11 @@ -// ============================================================================ -// state-integration.js - L0 记忆锚点管理 -// 支持增量提取、清空、取消 +// ============================================================================ +// state-integration.js - L0 状态层集成 +// Phase 1: 批量 LLM 提取(只存文本) +// Phase 2: 统一向量化(提取完成后) // ============================================================================ import { getContext } from '../../../../../../../extensions.js'; +import { saveMetadataDebounced } from '../../../../../../../extensions.js'; import { xbLog } from '../../../../core/debug-core.js'; import { saveStateAtoms, @@ -26,9 +28,15 @@ import { filterText } from '../utils/text-filter.js'; const MODULE_ID = 'state-integration'; +// ★ 并发配置 +const CONCURRENCY = 30; +const STAGGER_DELAY = 30; + let initialized = false; +let extractionCancelled = false; export function cancelL0Extraction() { + extractionCancelled = true; cancelBatchExtraction(); } @@ -53,6 +61,7 @@ export async function getAnchorStats() { return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 }; } + // 统计 AI 楼层 const aiFloors = []; for (let i = 0; i < chat.length; i++) { if (!chat[i]?.is_user) aiFloors.push(i); @@ -71,14 +80,20 @@ export async function getAnchorStats() { } const total = aiFloors.length; - const completed = ok + empty; - const pending = Math.max(0, total - completed); + const processed = ok + empty + fail; + const pending = Math.max(0, total - processed); - return { extracted: completed, total, pending, empty, fail }; + return { + extracted: ok + empty, + total, + pending, + empty, + fail + }; } // ============================================================================ -// 增量提取 +// 增量提取 - Phase 1 提取文本,Phase 2 统一向量化 // ============================================================================ function buildL0InputText(userMessage, aiMessage) { @@ -102,6 +117,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) { const vectorCfg = getVectorConfig(); if (!vectorCfg?.enabled) return { built: 0 }; + // ★ 重置取消标志 + extractionCancelled = false; + const pendingPairs = []; for (let i = 0; i < chat.length; i++) { @@ -109,6 +127,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) { if (!msg || msg.is_user) continue; const st = getL0FloorStatus(i); + // ★ 只跳过 ok 和 empty,fail 的可以重试 if (st?.status === 'ok' || st?.status === 'empty') { continue; } @@ -125,54 +144,109 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) { } if (!pendingPairs.length) { - onProgress?.(0, 0, '已全部提取'); + onProgress?.('已全部提取', 0, 0); return { built: 0 }; } - xbLog.info(MODULE_ID, `增量 L0 提取:pending=${pendingPairs.length}`); + xbLog.info(MODULE_ID, `增量 L0 提取:pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`); let completed = 0; + let failed = 0; const total = pendingPairs.length; let builtAtoms = 0; - for (const pair of pendingPairs) { - const floor = pair.aiFloor; - const prev = getL0FloorStatus(floor); + // ★ Phase 1: 收集所有新提取的 atoms(不向量化) + const allNewAtoms = []; - try { - const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 }); + // ★ 30 并发批次处理 + for (let i = 0; i < pendingPairs.length; i += CONCURRENCY) { + // ★ 检查取消 + if (extractionCancelled) { + xbLog.info(MODULE_ID, `用户取消,已完成 ${completed}/${total}`); + break; + } - if (atoms == null) { - throw new Error('llm_failed'); + const batch = pendingPairs.slice(i, i + CONCURRENCY); + + const promises = batch.map((pair, idx) => (async () => { + // 首批错开启动,避免瞬间打满 + if (i === 0) { + await new Promise(r => setTimeout(r, idx * STAGGER_DELAY)); } - if (!atoms.length) { - setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 }); - } else { - atoms.forEach(a => a.chatId = chatId); - saveStateAtoms(atoms); - await vectorizeAtoms(chatId, atoms); + // 再次检查取消 + if (extractionCancelled) return; - setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length }); - builtAtoms += atoms.length; + const floor = pair.aiFloor; + const prev = getL0FloorStatus(floor); + + try { + const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 }); + + if (extractionCancelled) return; + + if (atoms == null) { + throw new Error('llm_failed'); + } + + if (!atoms.length) { + setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 }); + } else { + atoms.forEach(a => a.chatId = chatId); + saveStateAtoms(atoms); + // ★ Phase 1: 只收集,不向量化 + allNewAtoms.push(...atoms); + + setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length }); + builtAtoms += atoms.length; + } + } catch (e) { + if (extractionCancelled) return; + + setL0FloorStatus(floor, { + status: 'fail', + attempts: (prev?.attempts || 0) + 1, + reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120), + }); + failed++; + } finally { + if (!extractionCancelled) { + completed++; + onProgress?.(`提取: ${completed}/${total}`, completed, total); + } } - } catch (e) { - setL0FloorStatus(floor, { - status: 'fail', - attempts: (prev?.attempts || 0) + 1, - reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120), - }); - } finally { - completed++; - onProgress?.(`L0: ${completed}/${total}`, completed, total); + })()); + + await Promise.all(promises); + + // 批次间短暂间隔 + if (i + CONCURRENCY < pendingPairs.length && !extractionCancelled) { + await new Promise(r => setTimeout(r, 30)); } } - xbLog.info(MODULE_ID, `增量 L0 完成:atoms=${builtAtoms}, floors=${pendingPairs.length}`); + // ★ 立即保存文本,不要等防抖 + try { + saveMetadataDebounced?.(); + } catch { } + + // ★ Phase 2: 统一向量化所有新提取的 atoms + if (allNewAtoms.length > 0 && !extractionCancelled) { + onProgress?.(`向量化 L0: 0/${allNewAtoms.length}`, 0, allNewAtoms.length); + await vectorizeAtoms(chatId, allNewAtoms, (current, total) => { + onProgress?.(`向量化 L0: ${current}/${total}`, current, total); + }); + } + + xbLog.info(MODULE_ID, `L0 ${extractionCancelled ? '已取消' : '完成'}:atoms=${builtAtoms}, completed=${completed}/${total}, failed=${failed}`); return { built: builtAtoms }; } -async function vectorizeAtoms(chatId, atoms) { +// ============================================================================ +// 向量化(支持进度回调) +// ============================================================================ + +async function vectorizeAtoms(chatId, atoms, onProgress) { if (!atoms?.length) return; const vectorCfg = getVectorConfig(); @@ -180,14 +254,27 @@ async function vectorizeAtoms(chatId, atoms) { const texts = atoms.map(a => a.semantic); const fingerprint = getEngineFingerprint(vectorCfg); + const batchSize = 20; try { - const vectors = await embed(texts, { timeout: 30000 }); + const allVectors = []; - const items = atoms.map((a, i) => ({ + for (let i = 0; i < texts.length; i += batchSize) { + if (extractionCancelled) break; + + const batch = texts.slice(i, i + batchSize); + const vectors = await embed(batch, { timeout: 30000 }); + allVectors.push(...vectors); + + onProgress?.(allVectors.length, texts.length); + } + + if (extractionCancelled) return; + + const items = atoms.slice(0, allVectors.length).map((a, i) => ({ atomId: a.atomId, floor: a.floor, - vector: vectors[i], + vector: allVectors[i], })); await saveStateVectors(chatId, items, fingerprint); @@ -207,11 +294,17 @@ export async function clearAllAtomsAndVectors(chatId) { if (chatId) { await clearStateVectors(chatId); } + + // ★ 立即保存 + try { + saveMetadataDebounced?.(); + } catch { } + xbLog.info(MODULE_ID, '已清空所有记忆锚点'); } // ============================================================================ -// 实时增量(AI 消息后触发)- 保留原有逻辑 +// 实时增量(AI 消息后触发)- 保持不变 // ============================================================================ let extractionQueue = []; @@ -245,7 +338,9 @@ async function processQueue() { atoms.forEach(a => a.chatId = chatId); saveStateAtoms(atoms); - await vectorizeAtoms(chatId, atoms); + + // 单楼实时处理:立即向量化 + await vectorizeAtomsSimple(chatId, atoms); xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`); } catch (e) { @@ -256,6 +351,31 @@ async function processQueue() { isProcessing = false; } +// 简单向量化(无进度回调,用于单楼实时处理) +async function vectorizeAtomsSimple(chatId, atoms) { + if (!atoms?.length) return; + + const vectorCfg = getVectorConfig(); + if (!vectorCfg?.enabled) return; + + const texts = atoms.map(a => a.semantic); + const fingerprint = getEngineFingerprint(vectorCfg); + + try { + const vectors = await embed(texts, { timeout: 30000 }); + + const items = atoms.map((a, i) => ({ + atomId: a.atomId, + floor: a.floor, + vector: vectors[i], + })); + + await saveStateVectors(chatId, items, fingerprint); + } catch (e) { + xbLog.error(MODULE_ID, 'L0 向量化失败', e); + } +} + // ============================================================================ // 回滚钩子 // ============================================================================ @@ -301,7 +421,7 @@ export async function rebuildStateVectors(chatId, vectorCfg) { xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`); await clearStateVectors(chatId); - await vectorizeAtoms(chatId, atoms); + await vectorizeAtomsSimple(chatId, atoms); return { built: atoms.length }; } diff --git a/modules/story-summary/vector/pipeline/state-recall.js b/modules/story-summary/vector/pipeline/state-recall.js index af1869b..eb67183 100644 --- a/modules/story-summary/vector/pipeline/state-recall.js +++ b/modules/story-summary/vector/pipeline/state-recall.js @@ -131,16 +131,44 @@ export function stateToVirtualChunks(l0Results) { // ═══════════════════════════════════════════════════════════════════════════ /** - * 合并 L0 和 L1 chunks,每楼层最多保留 limit 条 - * @param {Array} l0Chunks - 虚拟 chunks(已按相似度排序) - * @param {Array} l1Chunks - 真实 chunks(已按相似度排序) + * 合并 L0 和 L1 chunks + * @param {Array} l0Chunks - L0 虚拟 chunks(带 similarity) + * @param {Array} l1Chunks - L1 真实 chunks(无 similarity) * @param {number} limit - 每楼层上限 * @returns {Array} 合并后的 chunks */ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) { + // 构建 L0 楼层 → 最高 similarity 映射 + const floorSimilarity = new Map(); + for (const c of (l0Chunks || [])) { + const existing = floorSimilarity.get(c.floor) || 0; + if ((c.similarity || 0) > existing) { + floorSimilarity.set(c.floor, c.similarity || 0); + } + } + + // L1 继承所属楼层的 L0 similarity + const l1WithScore = (l1Chunks || []).map(c => ({ + ...c, + similarity: floorSimilarity.get(c.floor) || 0.5, + })); + // 合并并按相似度排序 - const all = [...(l0Chunks || []), ...(l1Chunks || [])] - .sort((a, b) => b.similarity - a.similarity); + const all = [...(l0Chunks || []), ...l1WithScore] + .sort((a, b) => { + // 相似度优先 + const simDiff = (b.similarity || 0) - (a.similarity || 0); + if (Math.abs(simDiff) > 0.01) return simDiff; + + // 同楼层:L0 优先于 L1 + if (a.floor === b.floor) { + if (a.isL0 && !b.isL0) return -1; + if (!a.isL0 && b.isL0) return 1; + } + + // 按楼层升序 + return a.floor - b.floor; + }); // 每楼层稀疏去重 const byFloor = new Map(); @@ -153,8 +181,9 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) { } } - // 扁平化并保持相似度排序 + // 扁平化并保持排序 return Array.from(byFloor.values()) .flat() - .sort((a, b) => b.similarity - a.similarity); + .sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); } + diff --git a/modules/story-summary/vector/retrieval/metrics.js b/modules/story-summary/vector/retrieval/metrics.js new file mode 100644 index 0000000..185e3c6 --- /dev/null +++ b/modules/story-summary/vector/retrieval/metrics.js @@ -0,0 +1,388 @@ +// ═══════════════════════════════════════════════════════════════════════════ +// Story Summary - Metrics Collector +// 召回质量指标收集与格式化 +// ═══════════════════════════════════════════════════════════════════════════ + +/** + * 创建空的指标对象 + */ +export function createMetrics() { + return { + // L0 Query Understanding + l0: { + needRecall: false, + intent: '', + focusEntities: [], + queries: [], + implicitTopics: [], + queryExpansionTime: 0, + atomsMatched: 0, + floorsHit: 0, + topAtoms: [], + }, + + // L1 Constraints (Facts) + l1: { + factsTotal: 0, + factsInjected: 0, + factsFiltered: 0, + tokens: 0, + samples: [], + }, + + // L2 Narrative Retrieval + l2: { + eventsInStore: 0, + eventsConsidered: 0, + eventsSelected: 0, + byRecallType: { direct: 0, causal: 0, context: 0 }, + similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 }, + entityFilterStats: null, + causalChainDepth: 0, + causalEventsCount: 0, + entitiesLoaded: 0, + entityNames: [], + retrievalTime: 0, + }, + + // L3 Evidence Assembly + l3: { + floorsFromL0: 0, + // 候选规模(rerank 前) + chunksInRange: 0, + chunksInRangeByType: { l0Virtual: 0, l1Real: 0 }, + // 最终注入(rerank + sparse 后) + chunksSelected: 0, + chunksSelectedByType: { l0Virtual: 0, l1Real: 0 }, + // 上下文配对 + contextPairsAdded: 0, + tokens: 0, + assemblyTime: 0, + // Rerank 相关 + rerankApplied: false, + beforeRerank: 0, + afterRerank: 0, + rerankTime: 0, + rerankScoreDistribution: null, + }, + + // L4 Formatting + l4: { + sectionsIncluded: [], + formattingTime: 0, + }, + + // Budget Summary + budget: { + total: 0, + limit: 0, + utilization: 0, + breakdown: { + constraints: 0, + events: 0, + entities: 0, + chunks: 0, + recentOrphans: 0, + arcs: 0, + }, + }, + + // Total Timing + timing: { + queryExpansion: 0, + l0Search: 0, + l1Constraints: 0, + l2Retrieval: 0, + l3Retrieval: 0, + l3Rerank: 0, + l3Assembly: 0, + l4Formatting: 0, + total: 0, + }, + + // Quality Indicators + quality: { + constraintCoverage: 100, + eventPrecisionProxy: 0, + evidenceDensity: 0, + potentialIssues: [], + }, + }; +} + +/** + * 计算相似度分布统计 + */ +export function calcSimilarityStats(similarities) { + if (!similarities?.length) { + return { min: 0, max: 0, mean: 0, median: 0 }; + } + + const sorted = [...similarities].sort((a, b) => a - b); + const sum = sorted.reduce((a, b) => a + b, 0); + + return { + min: Number(sorted[0].toFixed(3)), + max: Number(sorted[sorted.length - 1].toFixed(3)), + mean: Number((sum / sorted.length).toFixed(3)), + median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)), + }; +} + +/** + * 格式化指标为可读日志 + */ +export function formatMetricsLog(metrics) { + const m = metrics; + const lines = []; + + lines.push(''); + lines.push('═══════════════════════════════════════════════════════════════════'); + lines.push(' Recall Metrics Report '); + lines.push('═══════════════════════════════════════════════════════════════════'); + lines.push(''); + + // L0 Query Understanding + lines.push('[L0] Query Understanding'); + lines.push(`├─ need_recall: ${m.l0.needRecall}`); + if (m.l0.needRecall) { + lines.push(`├─ intent: ${m.l0.intent || 'mixed'}`); + lines.push(`├─ focus_entities: [${(m.l0.focusEntities || []).join(', ')}]`); + lines.push(`├─ queries: [${(m.l0.queries || []).slice(0, 3).join(', ')}]`); + lines.push(`├─ query_expansion_time: ${m.l0.queryExpansionTime}ms`); + lines.push(`├─ atoms_matched: ${m.l0.atomsMatched || 0}`); + lines.push(`└─ floors_hit: ${m.l0.floorsHit || 0}`); + } + lines.push(''); + + // L1 Constraints + lines.push('[L1] Constraints (Facts)'); + lines.push(`├─ facts_total: ${m.l1.factsTotal}`); + lines.push(`├─ facts_filtered: ${m.l1.factsFiltered || 0}`); + lines.push(`├─ facts_injected: ${m.l1.factsInjected}`); + lines.push(`├─ tokens: ${m.l1.tokens}`); + if (m.l1.samples && m.l1.samples.length > 0) { + lines.push(`└─ samples: "${m.l1.samples.slice(0, 2).join('", "')}"`); + } + lines.push(''); + + // L2 Narrative Retrieval + lines.push('[L2] Narrative Retrieval'); + lines.push(`├─ events_in_store: ${m.l2.eventsInStore}`); + lines.push(`├─ events_considered: ${m.l2.eventsConsidered}`); + + if (m.l2.entityFilterStats) { + const ef = m.l2.entityFilterStats; + lines.push(`├─ entity_filter:`); + lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`); + lines.push(`│ ├─ before_filter: ${ef.before}`); + lines.push(`│ ├─ after_filter: ${ef.after}`); + lines.push(`│ └─ filtered_out: ${ef.filtered}`); + } + + lines.push(`├─ events_selected: ${m.l2.eventsSelected}`); + lines.push(`├─ by_recall_type:`); + lines.push(`│ ├─ direct: ${m.l2.byRecallType.direct}`); + lines.push(`│ ├─ causal: ${m.l2.byRecallType.causal}`); + lines.push(`│ └─ context: ${m.l2.byRecallType.context}`); + + const sim = m.l2.similarityDistribution; + if (sim && sim.max > 0) { + lines.push(`├─ similarity_distribution:`); + lines.push(`│ ├─ min: ${sim.min}`); + lines.push(`│ ├─ max: ${sim.max}`); + lines.push(`│ ├─ mean: ${sim.mean}`); + lines.push(`│ └─ median: ${sim.median}`); + } + + lines.push(`├─ causal_chain: depth=${m.l2.causalChainDepth}, events=${m.l2.causalEventsCount}`); + lines.push(`├─ entities_loaded: ${m.l2.entitiesLoaded} [${(m.l2.entityNames || []).join(', ')}]`); + lines.push(`└─ retrieval_time: ${m.l2.retrievalTime}ms`); + lines.push(''); + + // L3 Evidence Assembly + lines.push('[L3] Evidence Assembly'); + lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`); + + // 候选规模 + lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`); + if (m.l3.chunksInRangeByType) { + const cir = m.l3.chunksInRangeByType; + lines.push(`│ ├─ l0_virtual: ${cir.l0Virtual || 0}`); + lines.push(`│ └─ l1_real: ${cir.l1Real || 0}`); + } + + // Rerank 信息 + if (m.l3.rerankApplied) { + lines.push(`├─ rerank_applied: true`); + lines.push(`│ ├─ before: ${m.l3.beforeRerank}`); + lines.push(`│ ├─ after: ${m.l3.afterRerank}`); + lines.push(`│ └─ time: ${m.l3.rerankTime}ms`); + if (m.l3.rerankScoreDistribution) { + const rd = m.l3.rerankScoreDistribution; + lines.push(`├─ rerank_scores: min=${rd.min}, max=${rd.max}, mean=${rd.mean}`); + } + } else { + lines.push(`├─ rerank_applied: false`); + } + + // 最终注入规模 + lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`); + if (m.l3.chunksSelectedByType) { + const cs = m.l3.chunksSelectedByType; + lines.push(`│ ├─ l0_virtual: ${cs.l0Virtual || 0}`); + lines.push(`│ └─ l1_real: ${cs.l1Real || 0}`); + } + + lines.push(`├─ context_pairs_added: ${m.l3.contextPairsAdded}`); + lines.push(`├─ tokens: ${m.l3.tokens}`); + lines.push(`└─ assembly_time: ${m.l3.assemblyTime}ms`); + lines.push(''); + + // L4 Formatting + lines.push('[L4] Prompt Formatting'); + lines.push(`├─ sections: [${(m.l4.sectionsIncluded || []).join(', ')}]`); + lines.push(`└─ formatting_time: ${m.l4.formattingTime}ms`); + lines.push(''); + + // Budget Summary + lines.push('[Budget Summary]'); + lines.push(`├─ total_tokens: ${m.budget.total}`); + lines.push(`├─ budget_limit: ${m.budget.limit}`); + lines.push(`├─ utilization: ${m.budget.utilization}%`); + lines.push(`└─ breakdown:`); + const bd = m.budget.breakdown || {}; + lines.push(` ├─ constraints (L1): ${bd.constraints || 0}`); + lines.push(` ├─ events (L2): ${bd.events || 0}`); + lines.push(` ├─ chunks (L3): ${bd.chunks || 0}`); + lines.push(` ├─ recent_orphans: ${bd.recentOrphans || 0}`); + lines.push(` └─ arcs: ${bd.arcs || 0}`); + lines.push(''); + + // Timing + lines.push('[Timing]'); + lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`); + lines.push(`├─ l0_search: ${m.timing.l0Search}ms`); + lines.push(`├─ l1_constraints: ${m.timing.l1Constraints}ms`); + lines.push(`├─ l2_retrieval: ${m.timing.l2Retrieval}ms`); + lines.push(`├─ l3_retrieval: ${m.timing.l3Retrieval}ms`); + if (m.timing.l3Rerank > 0) { + lines.push(`├─ l3_rerank: ${m.timing.l3Rerank}ms`); + } + lines.push(`├─ l3_assembly: ${m.timing.l3Assembly}ms`); + lines.push(`├─ l4_formatting: ${m.timing.l4Formatting}ms`); + lines.push(`└─ total: ${m.timing.total}ms`); + lines.push(''); + + // Quality Indicators + lines.push('[Quality Indicators]'); + lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`); + lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`); + lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`); + + if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) { + lines.push(`└─ potential_issues:`); + m.quality.potentialIssues.forEach((issue, i) => { + const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─'; + lines.push(`${prefix} ⚠ ${issue}`); + }); + } else { + lines.push(`└─ potential_issues: none`); + } + + lines.push(''); + lines.push('═══════════════════════════════════════════════════════════════════'); + lines.push(''); + + return lines.join('\n'); +} + +/** + * 检测潜在问题 + */ +export function detectIssues(metrics) { + const issues = []; + const m = metrics; + + // 召回比例问题 + if (m.l2.eventsConsidered > 0) { + const selectRatio = m.l2.eventsSelected / m.l2.eventsConsidered; + if (selectRatio < 0.1) { + issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`); + } + if (selectRatio > 0.6 && m.l2.eventsConsidered > 10) { + issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`); + } + } + + // 实体过滤问题 + if (m.l2.entityFilterStats) { + const ef = m.l2.entityFilterStats; + if (ef.filtered === 0 && ef.before > 10) { + issues.push(`No events filtered by entity - focus entities may be too broad or missing`); + } + if (ef.before > 0 && ef.filtered > ef.before * 0.8) { + issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`); + } + } + + // 相似度问题 + if (m.l2.similarityDistribution && m.l2.similarityDistribution.min > 0 && m.l2.similarityDistribution.min < 0.5) { + issues.push(`Low similarity events included (min=${m.l2.similarityDistribution.min})`); + } + + // 因果链问题 + if (m.l2.eventsSelected > 0 && m.l2.causalEventsCount === 0 && m.l2.byRecallType.direct === 0) { + issues.push('No direct or causal events - query expansion may be inaccurate'); + } + + // L0 atoms 问题 + if ((m.l0.atomsMatched || 0) === 0) { + issues.push('L0 atoms not matched - may need to generate anchors'); + } + + // Rerank 相关问题 + if (m.l3.rerankApplied) { + if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) { + const filterRatio = 1 - (m.l3.afterRerank / m.l3.beforeRerank); + if (filterRatio > 0.7) { + issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`); + } + } + + if (m.l3.rerankScoreDistribution) { + const rd = m.l3.rerankScoreDistribution; + if (rd.max < 0.5) { + issues.push(`Low rerank scores (max=${rd.max}) - query may be poorly matched`); + } + if (rd.mean < 0.3) { + issues.push(`Very low average rerank score (mean=${rd.mean}) - context may be weak`); + } + } + + if (m.l3.rerankTime > 2000) { + issues.push(`Slow rerank (${m.l3.rerankTime}ms) - may affect response time`); + } + } + + // 证据密度问题(基于 selected 的构成) + if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) { + const l1Real = m.l3.chunksSelectedByType.l1Real || 0; + const density = l1Real / m.l3.chunksSelected; + if (density < 0.3 && m.l3.chunksSelected > 10) { + issues.push(`Low L1 chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`); + } + } + + // 预算问题 + if (m.budget.utilization > 90) { + issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`); + } + + // 性能问题 + if (m.timing.total > 5000) { + issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`); + } + + return issues; +} diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js index 0b913f3..c5c094d 100644 --- a/modules/story-summary/vector/retrieval/recall.js +++ b/modules/story-summary/vector/retrieval/recall.js @@ -1,24 +1,21 @@ -// ═══════════════════════════════════════════════════════════════════════════ -// Story Summary - Recall Engine (v2 - LLM Augmented) -// 纯向量路召回,LLM Query Expansion 替代 BM25 +// ═══════════════════════════════════════════════════════════════════════════ +// Story Summary - Recall Engine (v3 - L0 作为 L3 索引 + Rerank 精排) +// +// 架构: +// - Query Expansion → L0(主索引)→ L3(按楼层拉取)→ Rerank(精排) +// - Query Expansion → L2(独立检索) +// - L0 和 L2 不在同一抽象层,分开处理 // ═══════════════════════════════════════════════════════════════════════════ -import { getAllChunkVectors, getAllEventVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js'; -import { getEngineFingerprint } from '../utils/embedder.js'; +import { getAllEventVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js'; +import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js'; +import { getEngineFingerprint, embed } from '../utils/embedder.js'; import { xbLog } from '../../../../core/debug-core.js'; import { getContext } from '../../../../../../../extensions.js'; -import { getSummaryStore } from '../../data/store.js'; import { filterText } from '../utils/text-filter.js'; -import { - searchStateAtoms, - buildL0FloorBonus, - stateToVirtualChunks, - mergeAndSparsify, -} from '../pipeline/state-recall.js'; - -// 新增:LLM 模块 import { expandQueryCached, buildSearchText } from '../llm/query-expansion.js'; -import { embed } from '../llm/siliconflow.js'; +import { rerankChunks } from '../llm/reranker.js'; +import { createMetrics, calcSimilarityStats } from './metrics.js'; const MODULE_ID = 'recall'; @@ -27,37 +24,31 @@ const MODULE_ID = 'recall'; // ═══════════════════════════════════════════════════════════════════════════ const CONFIG = { - // Query - QUERY_MSG_COUNT: 2, - QUERY_MAX_CHARS: 100, - QUERY_EXPANSION_TIMEOUT: 3000, + // Query Expansion + QUERY_EXPANSION_TIMEOUT: 6000, + + // L0 配置 + L0_MAX_RESULTS: 30, + L0_MIN_SIMILARITY: 0.50, + + // L2 配置 + L2_CANDIDATE_MAX: 100, + L2_SELECT_MAX: 50, + L2_MIN_SIMILARITY: 0.55, + L2_MMR_LAMBDA: 0.72, + + // L3 配置(从 L0 楼层拉取) + L3_MAX_CHUNKS_PER_FLOOR: 3, + L3_MAX_TOTAL_CHUNKS: 60, + + // Rerank 配置 + RERANK_TOP_N: 50, + RERANK_MIN_SCORE: 0.15, // 因果链 CAUSAL_CHAIN_MAX_DEPTH: 10, CAUSAL_INJECT_MAX: 30, - - // 候选数量 - CANDIDATE_CHUNKS: 150, - CANDIDATE_EVENTS: 100, - - // 最终输出 - MAX_CHUNKS: 40, - MAX_EVENTS: 80, - - // 相似度阈值 - MIN_SIMILARITY_CHUNK: 0.55, - MIN_SIMILARITY_CHUNK_RECENT: 0.45, - MIN_SIMILARITY_EVENT: 0.60, - - // MMR - MMR_LAMBDA: 0.72, - - // L0 加权 - L0_FLOOR_BONUS_FACTOR: 0.10, - FLOOR_MAX_CHUNKS: 2, }; - -// ═══════════════════════════════════════════════════════════════════════════ // 工具函数 // ═══════════════════════════════════════════════════════════════════════════ @@ -73,32 +64,92 @@ function cosineSimilarity(a, b) { } function normalize(s) { - return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase(); -} - -function parseFloorRange(summary) { - if (!summary) return null; - const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/); - if (!match) return null; - const start = Math.max(0, parseInt(match[1], 10) - 1); - const end = Math.max(0, (match[2] ? parseInt(match[2], 10) : parseInt(match[1], 10)) - 1); - return { start, end }; + return String(s || '') + .normalize('NFKC') + .replace(/[\u200B-\u200D\uFEFF]/g, '') + .trim() + .toLowerCase(); } function cleanForRecall(text) { return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim(); } +/** + * 从 focusEntities 中移除用户名 + * @param {Array} focusEntities - 焦点实体 + * @param {string} userName - 用户名 + * @returns {Array} 过滤后的实体 + */ +function removeUserNameFromFocus(focusEntities, userName) { + const u = normalize(userName); + if (!u) return Array.isArray(focusEntities) ? focusEntities : []; + + return (focusEntities || []) + .map(e => String(e || '').trim()) + .filter(Boolean) + .filter(e => normalize(e) !== u); +} + +/** + * 构建用于 Rerank 的查询文本 + * 综合 Query Expansion 结果和最近对话 + * @param {object} expansion - Query Expansion 结果 + * @param {Array} lastMessages - 最近的消息 + * @param {string} pendingUserMessage - 待发送的用户消息 + * @returns {string} Rerank 用的查询文本 + */ +function buildRerankQuery(expansion, lastMessages, pendingUserMessage) { + const parts = []; + + // 1. focus entities + if (expansion?.focus?.length) { + parts.push(expansion.focus.join(' ')); + } + + // 2. DSL queries(取前3个) + if (expansion?.queries?.length) { + parts.push(...expansion.queries.slice(0, 3)); + } + + // 3. 最近对话的关键内容 + const recentTexts = (lastMessages || []) + .slice(-2) + .map(m => cleanForRecall(m.mes || '').slice(0, 150)) + .filter(Boolean); + + if (recentTexts.length) { + parts.push(...recentTexts); + } + + // 4. 待发送消息 + if (pendingUserMessage) { + parts.push(cleanForRecall(pendingUserMessage).slice(0, 200)); + } + + return parts.filter(Boolean).join('\n').slice(0, 1500); +} + // ═══════════════════════════════════════════════════════════════════════════ // MMR 选择 // ═══════════════════════════════════════════════════════════════════════════ +/** + * MMR 多样性选择 + * @param {Array} candidates - 候选项 + * @param {number} k - 选择数量 + * @param {number} lambda - MMR 参数 + * @param {Function} getVector - 获取向量函数 + * @param {Function} getScore - 获取分数函数 + * @returns {Array} 选中的项 + */ function mmrSelect(candidates, k, lambda, getVector, getScore) { const selected = []; const ids = new Set(); while (selected.length < k && candidates.length) { - let best = null, bestScore = -Infinity; + let best = null; + let bestScore = -Infinity; for (const c of candidates) { if (ids.has(c._id)) continue; @@ -131,10 +182,385 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) { return selected; } +// ═══════════════════════════════════════════════════════════════════════════ +// L0 检索:Query → L0 → 楼层集合 +// ═══════════════════════════════════════════════════════════════════════════ + +/** + * L0 向量检索 + * @param {Array} queryVector - 查询向量 + * @param {object} vectorConfig - 向量配置 + * @param {object} metrics - 指标对象 + * @returns {Promise} {atoms, floors} + */ +async function searchL0(queryVector, vectorConfig, metrics) { + const { chatId } = getContext(); + if (!chatId || !queryVector?.length) { + return { atoms: [], floors: new Set() }; + } + + // 检查 fingerprint + const meta = await getMeta(chatId); + const fp = getEngineFingerprint(vectorConfig); + if (meta.fingerprint && meta.fingerprint !== fp) { + xbLog.warn(MODULE_ID, 'L0 fingerprint 不匹配'); + return { atoms: [], floors: new Set() }; + } + + // 获取向量 + const stateVectors = await getAllStateVectors(chatId); + if (!stateVectors.length) { + return { atoms: [], floors: new Set() }; + } + + // 获取 atoms 元数据 + const atomsList = getStateAtoms(); + const atomMap = new Map(atomsList.map(a => [a.atomId, a])); + + // 计算相似度 + const scored = stateVectors + .map(sv => { + const atom = atomMap.get(sv.atomId); + if (!atom) return null; + + return { + atomId: sv.atomId, + floor: sv.floor, + similarity: cosineSimilarity(queryVector, sv.vector), + atom, + }; + }) + .filter(Boolean) + .filter(s => s.similarity >= CONFIG.L0_MIN_SIMILARITY) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, CONFIG.L0_MAX_RESULTS); + + // 收集楼层 + const floors = new Set(scored.map(s => s.floor)); + + // 更新 metrics + if (metrics) { + metrics.l0.atomsMatched = scored.length; + metrics.l0.floorsHit = floors.size; + metrics.l0.topAtoms = scored.slice(0, 5).map(s => ({ + floor: s.floor, + semantic: s.atom?.semantic?.slice(0, 50), + similarity: Math.round(s.similarity * 1000) / 1000, + })); + } + + return { atoms: scored, floors }; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// L3 拉取:L0 楼层 → Chunks(带 Rerank 精排) +// ═══════════════════════════════════════════════════════════════════════════ + +/** + * 按楼层稀疏去重 + * 每楼层最多保留 limit 个 chunk,优先保留分数高的 + * @param {Array} chunks - chunk 列表(假设已按分数排序) + * @param {number} limit - 每楼层上限 + * @returns {Array} 去重后的 chunks + */ +function sparseByFloor(chunks, limit = 3) { + const byFloor = new Map(); + + for (const c of chunks) { + const arr = byFloor.get(c.floor) || []; + if (arr.length < limit) { + arr.push(c); + byFloor.set(c.floor, arr); + } + } + + const result = []; + const seen = new Set(); + + for (const c of chunks) { + if (!seen.has(c.chunkId)) { + const arr = byFloor.get(c.floor); + if (arr?.includes(c)) { + result.push(c); + seen.add(c.chunkId); + } + } + } + + return result; +} + +/** + * 统计 chunks 的类型构成 + * @param {Array} chunks - chunk 列表 + * @returns {object} {l0Virtual, l1Real} + */ +function countChunksByType(chunks) { + let l0Virtual = 0; + let l1Real = 0; + + for (const c of chunks || []) { + if (c.isL0) { + l0Virtual++; + } else { + l1Real++; + } + } + + return { l0Virtual, l1Real }; +} + +/** + * 从 L0 命中楼层拉取 chunks,并用 Reranker 精排 + * @param {Set} l0Floors - L0 命中的楼层 + * @param {Array} l0Atoms - L0 atoms(用于构建虚拟 chunks) + * @param {string} queryText - 查询文本(用于 rerank) + * @param {object} metrics - 指标对象 + * @returns {Promise} chunks 列表 + */ +async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) { + const { chatId } = getContext(); + if (!chatId || !l0Floors.size) { + return []; + } + + const floorArray = Array.from(l0Floors); + + // 从 DB 拉取 chunks + let dbChunks = []; + try { + dbChunks = await getChunksByFloors(chatId, floorArray); + } catch (e) { + xbLog.warn(MODULE_ID, '从 DB 拉取 chunks 失败', e); + } + + // 构建 L0 虚拟 chunks + const l0VirtualChunks = (l0Atoms || []).map(a => ({ + chunkId: `state-${a.atomId}`, + floor: a.floor, + chunkIdx: -1, + speaker: '📌', + isUser: false, + text: a.atom?.semantic || '', + similarity: a.similarity, + isL0: true, + _atom: a.atom, + })); + + // 合并所有 chunks + const allChunks = [...l0VirtualChunks, ...dbChunks.map(c => ({ + ...c, + isL0: false, + similarity: 0.5, + }))]; + + // ★ 更新 metrics - 候选规模(rerank 前) + if (metrics) { + metrics.l3.floorsFromL0 = floorArray.length; + metrics.l3.chunksInRange = allChunks.length; + metrics.l3.chunksInRangeByType = { + l0Virtual: l0VirtualChunks.length, + l1Real: dbChunks.length, + }; + } + + // 如果数量不超限,直接按楼层去重返回 + if (allChunks.length <= CONFIG.L3_MAX_TOTAL_CHUNKS) { + allChunks.sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); + + const selected = sparseByFloor(allChunks, CONFIG.L3_MAX_CHUNKS_PER_FLOOR); + + // ★ 更新 metrics - 最终注入规模 + if (metrics) { + metrics.l3.rerankApplied = false; + metrics.l3.chunksSelected = selected.length; + metrics.l3.chunksSelectedByType = countChunksByType(selected); + } + + return selected; + } + + // ★ Reranker 精排 + const T_Rerank_Start = performance.now(); + + const reranked = await rerankChunks(queryText, allChunks, { + topN: CONFIG.RERANK_TOP_N, + minScore: CONFIG.RERANK_MIN_SCORE, + }); + + const rerankTime = Math.round(performance.now() - T_Rerank_Start); + + // 按楼层稀疏去重 + const selected = sparseByFloor(reranked, CONFIG.L3_MAX_CHUNKS_PER_FLOOR); + + // ★ 更新 metrics + if (metrics) { + metrics.l3.rerankApplied = true; + metrics.l3.beforeRerank = allChunks.length; + metrics.l3.afterRerank = reranked.length; + metrics.l3.chunksSelected = selected.length; + metrics.l3.chunksSelectedByType = countChunksByType(selected); + metrics.l3.rerankTime = rerankTime; + metrics.timing.l3Rerank = rerankTime; + + // rerank 分数分布(基于 selected) + const scores = selected.map(c => c._rerankScore || 0).filter(s => s > 0); + if (scores.length > 0) { + scores.sort((a, b) => a - b); + metrics.l3.rerankScoreDistribution = { + min: Number(scores[0].toFixed(3)), + max: Number(scores[scores.length - 1].toFixed(3)), + mean: Number((scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(3)), + }; + } + } + + xbLog.info(MODULE_ID, `L3 Rerank: ${allChunks.length} → ${reranked.length} → ${selected.length} (${rerankTime}ms)`); + + return selected; +} + +// ═══════════════════════════════════════════════════════════════════════════ +// L2 检索:Query → Events(独立) +// ═══════════════════════════════════════════════════════════════════════════ + +/** + * L2 事件向量检索 + * @param {Array} queryVector - 查询向量 + * @param {Array} allEvents - 所有事件 + * @param {object} vectorConfig - 向量配置 + * @param {Array} focusEntities - 焦点实体(用于实体过滤) + * @param {object} metrics - 指标对象 + * @returns {Promise} 事件列表 + */ +async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntities, metrics) { + const { chatId } = getContext(); + if (!chatId || !queryVector?.length || !allEvents?.length) { + return []; + } + + // 检查 fingerprint + const meta = await getMeta(chatId); + const fp = getEngineFingerprint(vectorConfig); + if (meta.fingerprint && meta.fingerprint !== fp) { + xbLog.warn(MODULE_ID, 'L2 fingerprint 不匹配'); + return []; + } + + // 获取事件向量 + const eventVectors = await getAllEventVectors(chatId); + const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector])); + + if (!vectorMap.size) { + return []; + } + + // 实体匹配集合 + const focusSet = new Set((focusEntities || []).map(normalize)); + + // 计算相似度 + const scored = allEvents.map(event => { + const v = vectorMap.get(event.id); + const baseSim = v ? cosineSimilarity(queryVector, v) : 0; + + // 实体命中检查 + const participants = (event.participants || []).map(p => normalize(p)); + const hasEntityMatch = participants.some(p => focusSet.has(p)); + + // 实体匹配加权 + const bonus = hasEntityMatch ? 0.05 : 0; + + return { + _id: event.id, + event, + similarity: baseSim + bonus, + _baseSim: baseSim, + _hasEntityMatch: hasEntityMatch, + vector: v, + }; + }); + + // 更新 metrics + if (metrics) { + metrics.l2.eventsInStore = allEvents.length; + } + + // 阈值过滤 + let candidates = scored + .filter(s => s.similarity >= CONFIG.L2_MIN_SIMILARITY) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, CONFIG.L2_CANDIDATE_MAX); + + if (metrics) { + metrics.l2.eventsConsidered = candidates.length; + } + + // 实体过滤(可选) + if (focusSet.size > 0) { + const beforeFilter = candidates.length; + + candidates = candidates.filter(c => { + // 高相似度绕过 + if (c.similarity >= 0.85) return true; + // 有实体匹配的保留 + return c._hasEntityMatch; + }); + + if (metrics) { + metrics.l2.entityFilterStats = { + focusEntities: focusEntities || [], + before: beforeFilter, + after: candidates.length, + filtered: beforeFilter - candidates.length, + }; + } + } + + // MMR 去重 + const selected = mmrSelect( + candidates, + CONFIG.L2_SELECT_MAX, + CONFIG.L2_MMR_LAMBDA, + c => c.vector, + c => c.similarity + ); + + // 统计召回类型 + let directCount = 0; + let contextCount = 0; + + const results = selected.map(s => { + const recallType = s._hasEntityMatch ? 'DIRECT' : 'SIMILAR'; + if (recallType === 'DIRECT') directCount++; + else contextCount++; + + return { + event: s.event, + similarity: s.similarity, + _recallType: recallType, + _baseSim: s._baseSim, + }; + }); + + // 更新 metrics + if (metrics) { + metrics.l2.eventsSelected = results.length; + metrics.l2.byRecallType = { direct: directCount, context: contextCount, causal: 0 }; + metrics.l2.similarityDistribution = calcSimilarityStats(results.map(r => r.similarity)); + } + + return results; +} + // ═══════════════════════════════════════════════════════════════════════════ // 因果链追溯 // ═══════════════════════════════════════════════════════════════════════════ +/** + * 构建事件索引 + * @param {Array} allEvents - 所有事件 + * @returns {Map} 事件索引 + */ function buildEventIndex(allEvents) { const map = new Map(); for (const e of allEvents || []) { @@ -143,9 +569,17 @@ function buildEventIndex(allEvents) { return map; } +/** + * 追溯因果祖先 + * @param {Array} recalledEvents - 召回的事件 + * @param {Map} eventIndex - 事件索引 + * @param {number} maxDepth - 最大深度 + * @returns {object} {results, maxDepth} + */ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { const out = new Map(); const idRe = /^evt-\d+$/; + let maxActualDepth = 0; function visit(parentId, depth, chainFrom) { if (depth > maxDepth) return; @@ -154,6 +588,8 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS const ev = eventIndex.get(parentId); if (!ev) return; + if (depth > maxActualDepth) maxActualDepth = depth; + const existed = out.get(parentId); if (!existed) { out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] }); @@ -175,404 +611,48 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS } } - return Array.from(out.values()) + const results = Array.from(out.values()) .sort((a, b) => { const refDiff = b.chainFrom.length - a.chainFrom.length; if (refDiff !== 0) return refDiff; return a.depth - b.depth; }) .slice(0, CONFIG.CAUSAL_INJECT_MAX); + + return { results, maxDepth: maxActualDepth }; } // ═══════════════════════════════════════════════════════════════════════════ -// Query 构建 +// 辅助函数 // ═══════════════════════════════════════════════════════════════════════════ -function getLastRounds(chat, roundCount = 3, excludeLastAi = false) { +/** + * 获取最近的消息 + * @param {Array} chat - 聊天数组 + * @param {number} count - 消息数量 + * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息 + * @returns {Array} 消息列表 + */ +function getLastMessages(chat, count = 4, excludeLastAi = false) { if (!chat?.length) return []; let messages = [...chat]; + + // 排除最后一条 AI 消息(swipe/regenerate 场景) if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { messages = messages.slice(0, -1); } - const result = []; - let rounds = 0; - - for (let i = messages.length - 1; i >= 0 && rounds < roundCount; i--) { - result.unshift(messages[i]); - if (messages[i]?.is_user) rounds++; - } - - return result; + return messages.slice(-count); } -// ═══════════════════════════════════════════════════════════════════════════ -// L2 Events 检索(纯向量) -// ═══════════════════════════════════════════════════════════════════════════ - -async function searchEvents(queryVector, allEvents, vectorConfig, entitySet, l0FloorBonus) { - const { chatId } = getContext(); - if (!chatId || !queryVector?.length) return []; - - const meta = await getMeta(chatId); - const fp = getEngineFingerprint(vectorConfig); - if (meta.fingerprint && meta.fingerprint !== fp) return []; - - const eventVectors = await getAllEventVectors(chatId); - const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector])); - if (!vectorMap.size) return []; - - // 向量检索 - const scored = (allEvents || []).map(event => { - const v = vectorMap.get(event.id); - const rawSim = v ? cosineSimilarity(queryVector, v) : 0; - - // L0 加权 - let bonus = 0; - const range = parseFloorRange(event.summary); - if (range) { - for (let f = range.start; f <= range.end; f++) { - if (l0FloorBonus.has(f)) { - bonus += l0FloorBonus.get(f); - break; - } - } - } - - // 实体命中加分 - const participants = (event.participants || []).map(p => normalize(p)); - const hasEntity = participants.some(p => entitySet.has(p)); - if (hasEntity) bonus += 0.05; - - return { - _id: event.id, - event, - similarity: rawSim + bonus, - _rawSim: rawSim, - _hasEntity: hasEntity, - vector: v, - }; - }); - - // 过滤 + 排序 - const candidates = scored - .filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_EVENT) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, CONFIG.CANDIDATE_EVENTS); - - // MMR 去重 - const selected = mmrSelect( - candidates, - CONFIG.MAX_EVENTS, - CONFIG.MMR_LAMBDA, - c => c.vector, - c => c.similarity - ); - - return selected.map(s => ({ - event: s.event, - similarity: s.similarity, - _recallType: s._hasEntity ? 'DIRECT' : 'SIMILAR', - _rawSim: s._rawSim, - })); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// L1 Chunks 检索(纯向量) -// ═══════════════════════════════════════════════════════════════════════════ - -async function searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor) { - const { chatId } = getContext(); - if (!chatId || !queryVector?.length) return []; - - const meta = await getMeta(chatId); - const fp = getEngineFingerprint(vectorConfig); - if (meta.fingerprint && meta.fingerprint !== fp) return []; - - const chunkVectors = await getAllChunkVectors(chatId); - if (!chunkVectors.length) return []; - - // 向量检索 - const scored = chunkVectors.map(cv => { - const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/); - const floor = match ? parseInt(match[1], 10) : 0; - const baseSim = cosineSimilarity(queryVector, cv.vector); - const l0Bonus = l0FloorBonus.get(floor) || 0; - - return { - _id: cv.chunkId, - chunkId: cv.chunkId, - floor, - chunkIdx: match ? parseInt(match[2], 10) : 0, - similarity: baseSim + l0Bonus, - _baseSim: baseSim, - vector: cv.vector, - }; - }); - - // 过滤(近期区域用更低阈值) - const candidates = scored - .filter(s => { - const threshold = s.floor > lastSummarizedFloor - ? CONFIG.MIN_SIMILARITY_CHUNK_RECENT - : CONFIG.MIN_SIMILARITY_CHUNK; - return s.similarity >= threshold; - }) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, CONFIG.CANDIDATE_CHUNKS); - - // MMR 去重 - const selected = mmrSelect( - candidates, - CONFIG.MAX_CHUNKS, - CONFIG.MMR_LAMBDA, - c => c.vector, - c => c.similarity - ); - - // 每楼层稀疏 - const bestByFloor = new Map(); - for (const s of selected) { - const prev = bestByFloor.get(s.floor); - if (!prev || s.similarity > prev.similarity) { - bestByFloor.set(s.floor, s); - } - } - - const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity); - - // 获取完整 chunk 数据 - const floors = [...new Set(sparse.map(c => c.floor))]; - const chunks = await getChunksByFloors(chatId, floors); - const chunkMap = new Map(chunks.map(c => [c.chunkId, c])); - - return sparse.map(item => { - const chunk = chunkMap.get(item.chunkId); - if (!chunk) return null; - return { - chunkId: item.chunkId, - floor: item.floor, - chunkIdx: item.chunkIdx, - speaker: chunk.speaker, - isUser: chunk.isUser, - text: chunk.text, - similarity: item.similarity, - }; - }).filter(Boolean); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 日志格式化 -// ═══════════════════════════════════════════════════════════════════════════ - -function formatRecallLog({ elapsed, expansion, l0Results, chunkResults, eventResults, causalEvents }) { - const lines = [ - '╔══════════════════════════════════════════════════════════════╗', - '║ 记忆召回报告 (v2) ║', - '╠══════════════════════════════════════════════════════════════╣', - `║ 总耗时: ${elapsed}ms `, - '╚══════════════════════════════════════════════════════════════╝', - '', - '┌─────────────────────────────────────────────────────────────┐', - '│ 【Query Expansion】LLM 语义翻译 │', - '└─────────────────────────────────────────────────────────────┘', - ]; - - if (expansion) { - if (expansion.entities?.length) { - lines.push(` 实体: ${expansion.entities.join(' | ')}`); - } - if (expansion.implicit?.length) { - lines.push(` 隐含: ${expansion.implicit.join(' | ')}`); - } - if (expansion.queries?.length) { - lines.push(` 短句: ${expansion.queries.join(' | ')}`); - } - } else { - lines.push(' (未启用或失败)'); - } - - lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【召回统计】 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - - // L0 - const l0Floors = [...new Set((l0Results || []).map(r => r.floor))].sort((a, b) => a - b); - lines.push(` L0 Atoms: ${l0Results?.length || 0} 条`); - if (l0Floors.length) { - lines.push(` 影响楼层: ${l0Floors.slice(0, 10).join(', ')}${l0Floors.length > 10 ? '...' : ''}`); - } - - // L1 - lines.push(` L1 Chunks: ${chunkResults?.length || 0} 条`); - - // L2 - const directCount = (eventResults || []).filter(e => e._recallType === 'DIRECT').length; - const similarCount = (eventResults || []).filter(e => e._recallType === 'SIMILAR').length; - lines.push(` L2 Events: ${eventResults?.length || 0} 条 (实体命中: ${directCount}, 相似: ${similarCount})`); - - // 因果链 - if (causalEvents?.length) { - lines.push(` 因果链: ${causalEvents.length} 条`); - } - - // Top Events - if (eventResults?.length) { - lines.push(''); - lines.push('┌─────────────────────────────────────────────────────────────┐'); - lines.push('│ 【Top 5 Events】 │'); - lines.push('└─────────────────────────────────────────────────────────────┘'); - - eventResults.slice(0, 5).forEach((e, i) => { - const ev = e.event || {}; - const title = (ev.title || '').slice(0, 20).padEnd(20); - const sim = (e.similarity || 0).toFixed(2); - const type = e._recallType === 'DIRECT' ? '⭐' : '○'; - lines.push(` ${i + 1}. ${type} ${title} sim=${sim}`); - }); - } - - lines.push(''); - return lines.join('\n'); -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 主函数 -// ═══════════════════════════════════════════════════════════════════════════ - -export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) { - const T0 = performance.now(); - const { chat } = getContext(); - const store = getSummaryStore(); - const lastSummarizedFloor = store?.lastSummarizedMesId ?? -1; - const { pendingUserMessage = null, excludeLastAi = false } = options; - - if (!allEvents?.length) { - return { events: [], chunks: [], elapsed: 0, logText: 'No events.' }; - } - - // ═══════════════════════════════════════════════════════════════════════ - // Step 1: Query Expansion(LLM 语义翻译) - // ═══════════════════════════════════════════════════════════════════════ - - const lastRounds = getLastRounds(chat, 3, excludeLastAi); - if (pendingUserMessage) { - lastRounds.push({ is_user: true, mes: pendingUserMessage }); - } - - let expansion = { entities: [], implicit: [], queries: [] }; - try { - expansion = await expandQueryCached(lastRounds, { timeout: 6000 }); - xbLog.info(MODULE_ID, `Query Expansion: e=${expansion.entities.length} i=${expansion.implicit.length} q=${expansion.queries.length}`); - } catch (e) { - xbLog.warn(MODULE_ID, 'Query Expansion 失败,降级使用原始文本', e); - } - - const searchText = buildSearchText(expansion); - const finalSearchText = searchText || lastRounds.map(m => filterText(m.mes || '').slice(0, 200)).join(' '); - - // ═══════════════════════════════════════════════════════════════════════ - // Step 2: 向量化 - // ═══════════════════════════════════════════════════════════════════════ - - let queryVector; - try { - const [vec] = await embed([finalSearchText], { timeout: 10000 }); - queryVector = vec; - } catch (e) { - xbLog.error(MODULE_ID, '向量化失败', e); - return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Embedding failed.' }; - } - - if (!queryVector?.length) { - return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' }; - } - - // ═══════════════════════════════════════════════════════════════════════ - // Step 3: L0 召回 - // ═══════════════════════════════════════════════════════════════════════ - - let l0Results = []; - let l0FloorBonus = new Map(); - let l0VirtualChunks = []; - - try { - l0Results = await searchStateAtoms(queryVector, vectorConfig); - l0FloorBonus = buildL0FloorBonus(l0Results, CONFIG.L0_FLOOR_BONUS_FACTOR); - l0VirtualChunks = stateToVirtualChunks(l0Results); - } catch (e) { - xbLog.warn(MODULE_ID, 'L0 召回失败', e); - } - - // ═══════════════════════════════════════════════════════════════════════ - // Step 4: L1 + L2 召回(并行) - // ═══════════════════════════════════════════════════════════════════════ - - const entitySet = new Set((expansion.entities || []).map(normalize)); - - const [chunkResults, eventResults] = await Promise.all([ - searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor), - searchEvents(queryVector, allEvents, vectorConfig, entitySet, l0FloorBonus), - ]); - - // 合并 L0 虚拟 chunks 和 L1 chunks - const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS); - - // ═══════════════════════════════════════════════════════════════════════ - // Step 5: 因果链追溯 - // ═══════════════════════════════════════════════════════════════════════ - - const eventIndex = buildEventIndex(allEvents); - const causalMap = traceCausalAncestors(eventResults, eventIndex); - - const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean)); - const causalEvents = causalMap - .filter(x => x?.event?.id && !recalledIdSet.has(x.event.id)) - .map(x => ({ - event: x.event, - similarity: 0, - _recallType: 'CAUSAL', - _causalDepth: x.depth, - chainFrom: x.chainFrom, - })); - - // ═══════════════════════════════════════════════════════════════════════ - // 返回 - // ═══════════════════════════════════════════════════════════════════════ - - const elapsed = Math.round(performance.now() - T0); - const logText = formatRecallLog({ - elapsed, - expansion, - l0Results, - chunkResults: mergedChunks, - eventResults, - causalEvents, - }); - - console.group('%c[Recall v2]', 'color: #7c3aed; font-weight: bold'); - console.log(`Elapsed: ${elapsed}ms`); - console.log(`Expansion: ${expansion.entities.join(', ')} | ${expansion.implicit.join(', ')}`); - console.log(`L0: ${l0Results.length} | L1: ${mergedChunks.length} | L2: ${eventResults.length} | Causal: ${causalEvents.length}`); - console.groupEnd(); - - return { - events: eventResults, - causalEvents, - chunks: mergedChunks, - expansion, - queryEntities: expansion.entities, - elapsed, - logText, - }; -} - -// ═══════════════════════════════════════════════════════════════════════════ -// 辅助导出 -// ═══════════════════════════════════════════════════════════════════════════ - +/** + * 构建查询文本(降级用) + * @param {Array} chat - 聊天数组 + * @param {number} count - 消息数量 + * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息 + * @returns {string} 查询文本 + */ export function buildQueryText(chat, count = 2, excludeLastAi = false) { if (!chat?.length) return ''; @@ -587,3 +667,171 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) { return `${speaker}: ${text.slice(0, 500)}`; }).filter(Boolean).join('\n'); } + +// ═══════════════════════════════════════════════════════════════════════════ +// 主函数 +// ═══════════════════════════════════════════════════════════════════════════ + +/** + * 记忆召回主函数 + * @param {string} queryText - 查询文本(降级用) + * @param {Array} allEvents - 所有事件 + * @param {object} vectorConfig - 向量配置 + * @param {object} options - 选项 + * @returns {Promise} 召回结果 + */ +export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) { + const T0 = performance.now(); + const { chat, name1 } = getContext(); + const { pendingUserMessage = null, excludeLastAi = false } = options; + + const metrics = createMetrics(); + + if (!allEvents?.length) { + metrics.l0.needRecall = false; + return { events: [], chunks: [], causalEvents: [], focusEntities: [], elapsed: 0, logText: 'No events.', metrics }; + } + + // ═══════════════════════════════════════════════════════════════════════ + // Step 1: Query Expansion + // ═══════════════════════════════════════════════════════════════════════ + + const T_QE_Start = performance.now(); + + // 获取最近对话 + const lastMessages = getLastMessages(chat, 4, excludeLastAi); + + let expansion = { focus: [], queries: [] }; + try { + expansion = await expandQueryCached(lastMessages, { + pendingUserMessage, + timeout: CONFIG.QUERY_EXPANSION_TIMEOUT, + }); + xbLog.info(MODULE_ID, `Query Expansion: focus=[${expansion.focus.join(',')}] queries=${expansion.queries.length}`); + } catch (e) { + xbLog.warn(MODULE_ID, 'Query Expansion 失败,降级使用原始文本', e); + } + + // 构建检索文本 + const searchText = buildSearchText(expansion); + const finalSearchText = searchText || queryText || lastMessages.map(m => cleanForRecall(m.mes || '').slice(0, 200)).join(' '); + + // focusEntities(移除用户名) + const focusEntities = removeUserNameFromFocus(expansion.focus, name1); + + // 更新 L0 metrics + metrics.l0.needRecall = true; + metrics.l0.focusEntities = focusEntities; + metrics.l0.queries = expansion.queries || []; + metrics.l0.queryExpansionTime = Math.round(performance.now() - T_QE_Start); + metrics.timing.queryExpansion = metrics.l0.queryExpansionTime; + + // ═══════════════════════════════════════════════════════════════════════ + // Step 2: 向量化查询 + // ═══════════════════════════════════════════════════════════════════════ + + let queryVector; + try { + const [vec] = await embed([finalSearchText], vectorConfig, { timeout: 10000 }); + queryVector = vec; + } catch (e) { + xbLog.error(MODULE_ID, '向量化失败', e); + metrics.timing.total = Math.round(performance.now() - T0); + return { events: [], chunks: [], causalEvents: [], focusEntities, elapsed: metrics.timing.total, logText: 'Embedding failed.', metrics }; + } + + if (!queryVector?.length) { + metrics.timing.total = Math.round(performance.now() - T0); + return { events: [], chunks: [], causalEvents: [], focusEntities, elapsed: metrics.timing.total, logText: 'Empty query vector.', metrics }; + } + + // ═══════════════════════════════════════════════════════════════════════ + // Step 3: L0 检索 → L3 拉取(并行准备) + // ═══════════════════════════════════════════════════════════════════════ + + const T_L0_Start = performance.now(); + + const { atoms: l0Atoms, floors: l0Floors } = await searchL0(queryVector, vectorConfig, metrics); + + metrics.timing.l0Search = Math.round(performance.now() - T_L0_Start); + + // ═══════════════════════════════════════════════════════════════════════ + // Step 4: L3 从 L0 楼层拉取(带 Rerank) + // ═══════════════════════════════════════════════════════════════════════ + + const T_L3_Start = performance.now(); + + // 构建 rerank 用的查询文本 + const rerankQuery = buildRerankQuery(expansion, lastMessages, pendingUserMessage); + + const chunks = await getChunksFromL0Floors(l0Floors, l0Atoms, rerankQuery, metrics); + + metrics.timing.l3Retrieval = Math.round(performance.now() - T_L3_Start); + + // ═══════════════════════════════════════════════════════════════════════ + // Step 5: L2 独立检索 + // ═══════════════════════════════════════════════════════════════════════ + + const T_L2_Start = performance.now(); + + const eventResults = await searchL2Events(queryVector, allEvents, vectorConfig, focusEntities, metrics); + + metrics.timing.l2Retrieval = Math.round(performance.now() - T_L2_Start); + + // ═══════════════════════════════════════════════════════════════════════ + // Step 6: 因果链追溯 + // ═══════════════════════════════════════════════════════════════════════ + + const eventIndex = buildEventIndex(allEvents); + const { results: causalMap, maxDepth: causalMaxDepth } = traceCausalAncestors(eventResults, eventIndex); + + const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean)); + const causalEvents = causalMap + .filter(x => x?.event?.id && !recalledIdSet.has(x.event.id)) + .map(x => ({ + event: x.event, + similarity: 0, + _recallType: 'CAUSAL', + _causalDepth: x.depth, + chainFrom: x.chainFrom, + })); + + // 更新因果链 metrics + if (metrics.l2.byRecallType) { + metrics.l2.byRecallType.causal = causalEvents.length; + } + metrics.l2.causalChainDepth = causalMaxDepth; + metrics.l2.causalEventsCount = causalEvents.length; + + // ═══════════════════════════════════════════════════════════════════════ + // 完成 + // ═══════════════════════════════════════════════════════════════════════ + + metrics.timing.total = Math.round(performance.now() - T0); + + // 实体信息 + metrics.l2.entityNames = focusEntities; + metrics.l2.entitiesLoaded = focusEntities.length; + + // 日志 + console.group('%c[Recall v3]', 'color: #7c3aed; font-weight: bold'); + console.log(`Elapsed: ${metrics.timing.total}ms`); + console.log(`Query Expansion: focus=[${expansion.focus.join(', ')}]`); + console.log(`L0: ${l0Atoms.length} atoms → ${l0Floors.size} floors`); + console.log(`L3: ${chunks.length} chunks (L0=${metrics.l3.chunksSelectedByType?.l0Virtual || 0}, DB=${metrics.l3.chunksSelectedByType?.l1Real || 0})`); + if (metrics.l3.rerankApplied) { + console.log(`L3 Rerank: ${metrics.l3.beforeRerank} → ${metrics.l3.afterRerank} (${metrics.l3.rerankTime}ms)`); + } + console.log(`L2: ${eventResults.length} events, ${causalEvents.length} causal`); + console.groupEnd(); + + return { + events: eventResults, + causalEvents, + chunks, + expansion, + focusEntities, + elapsed: metrics.timing.total, + metrics, + }; +} diff --git a/modules/streaming-generation.js b/modules/streaming-generation.js index 7d15708..a134f61 100644 --- a/modules/streaming-generation.js +++ b/modules/streaming-generation.js @@ -166,6 +166,7 @@ class StreamingGeneration { if (!source) throw new Error(`不支持的 api: ${opts.api}`); const model = String(opts.model || '').trim(); + const msgCount = Array.isArray(messages) ? messages.length : null; if (!model) { try { xbLog.error('streamingGeneration', 'missing model', null); } catch {} @@ -175,7 +176,6 @@ class StreamingGeneration { try { try { if (xbLog.isEnabled?.()) { - const msgCount = Array.isArray(messages) ? messages.length : null; xbLog.info('streamingGeneration', `callAPI stream=${!!stream} api=${String(opts.api || '')} model=${model} messages=${msgCount ?? '-'}`); } } catch {} @@ -286,10 +286,34 @@ class StreamingGeneration { } + const logSendRequestError = (err, streamMode) => { + if (err?.name !== 'AbortError') { + const safeApiUrl = String(cmdApiUrl || reverseProxy || oai_settings?.custom_url || '').trim(); + try { + xbLog.error('streamingGeneration', 'sendRequest failed', { + message: err?.message || String(err), + name: err?.name, + stream: !!streamMode, + api: String(opts.api || ''), + model, + msgCount, + apiurl: safeApiUrl, + }); + } catch {} + console.error('[xbgen:callAPI] sendRequest failed:', err); + } + }; + if (stream) { const payload = ChatCompletionService.createRequestData(body); - const streamFactory = await ChatCompletionService.sendRequest(payload, false, abortSignal); + let streamFactory; + try { + streamFactory = await ChatCompletionService.sendRequest(payload, false, abortSignal); + } catch (err) { + logSendRequestError(err, true); + throw err; + } const generator = (typeof streamFactory === 'function') ? streamFactory() : streamFactory; @@ -350,7 +374,13 @@ class StreamingGeneration { })(); } else { const payload = ChatCompletionService.createRequestData(body); - const extracted = await ChatCompletionService.sendRequest(payload, false, abortSignal); + let extracted; + try { + extracted = await ChatCompletionService.sendRequest(payload, false, abortSignal); + } catch (err) { + logSendRequestError(err, false); + throw err; + } let result = ''; if (extracted && typeof extracted === 'object') {