From 816196a71083c5deb55a7ae49769ea68dba82ba6 Mon Sep 17 00:00:00 2001 From: bielie Date: Wed, 11 Feb 2026 17:21:04 +0800 Subject: [PATCH] feat(recall): clamp focus weight and adjust pending context window --- .../story-summary/vector/retrieval/metrics.js | 115 +++-- .../vector/retrieval/query-builder.js | 338 ++++++++------ .../story-summary/vector/retrieval/recall.js | 418 ++++++++++++------ 3 files changed, 544 insertions(+), 327 deletions(-) diff --git a/modules/story-summary/vector/retrieval/metrics.js b/modules/story-summary/vector/retrieval/metrics.js index 527d6d1..5d56ee9 100644 --- a/modules/story-summary/vector/retrieval/metrics.js +++ b/modules/story-summary/vector/retrieval/metrics.js @@ -1,16 +1,12 @@ // ═══════════════════════════════════════════════════════════════════════════ -// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence) +// Story Summary - Metrics Collector (v5 - Weighted Query + Floor Aggregation) // -// 命名规范: -// - 存储层用 L0/L1/L2/L3(StateAtom/Chunk/Event/Fact) -// - 指标层用语义名称:anchor/evidence/event/constraint/arc -// -// 架构变更(v3 → v4): -// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构 -// - 删除 mergedByType / selectedByType(不再有混合池) -// - 新增 floorCandidates / floorsSelected / l0Collected / l1Pulled / l1Attached / l1CosineTime -// - fusion 区块明确标注 L0-only(删除 anchorCount) -// - quality.chunkRealRatio → quality.l1AttachRate +// v4 → v5 变更: +// - query: 新增 segmentWeights / r2Weights(加权向量诊断) +// - fusion: 新增 denseAggMethod / lexDensityBonus(聚合策略可观测) +// - quality: 新增 rerankRetentionRate(粗排-精排一致性) +// - 移除 timing 中从未写入的死字段(queryBuild/queryRefine/lexicalSearch/fusion) +// - 移除从未写入的 arc 区块 // ═══════════════════════════════════════════════════════════════════════════ /** @@ -25,9 +21,11 @@ export function createMetrics() { refineTime: 0, lengths: { v0Chars: 0, - v1Chars: null, // null = NA + v1Chars: null, // null = 无 hints rerankChars: 0, }, + segmentWeights: [], // R1 归一化后权重 [context..., focus] + r2Weights: null, // R2 归一化后权重 [context..., focus, hints](null = 无 hints) }, // Anchor (L0 StateAtoms) - 语义锚点 @@ -55,6 +53,8 @@ export function createMetrics() { totalUnique: 0, afterCap: 0, time: 0, + denseAggMethod: '', // 聚合方法描述(如 "max×0.6+mean×0.4") + lexDensityBonus: 0, // 密度加成系数 }, // Constraint (L3 Facts) - 世界约束 @@ -83,34 +83,28 @@ export function createMetrics() { // Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据 evidence: { // Stage 1: Floor - floorCandidates: 0, // W-RRF 融合后的 floor 候选数 - floorsSelected: 0, // rerank 后选中的 floor 数 - l0Collected: 0, // 选中 floor 中收集的 L0 atom 总数 + floorCandidates: 0, + floorsSelected: 0, + l0Collected: 0, rerankApplied: false, rerankFailed: false, beforeRerank: 0, afterRerank: 0, rerankTime: 0, rerankScores: null, - rerankDocAvgLength: 0, // rerank document 平均字符数 + rerankDocAvgLength: 0, // Stage 2: L1 - l1Pulled: 0, // 从 DB 拉取的 L1 chunk 总数 - l1Attached: 0, // 实际挂载的 L1 数(top-1 × floor × 2侧) - l1CosineTime: 0, // L1 cosine 打分耗时 + l1Pulled: 0, + l1Attached: 0, + l1CosineTime: 0, // 装配 - contextPairsAdded: 0, // USER 侧挂载数量 + contextPairsAdded: 0, tokens: 0, assemblyTime: 0, }, - // Arc - 人物弧光 - arc: { - injected: 0, - tokens: 0, - }, - // Formatting - 格式化 formatting: { sectionsIncluded: [], @@ -131,13 +125,9 @@ export function createMetrics() { }, }, - // Timing - 计时 + // Timing - 计时(仅包含实际写入的字段) timing: { - queryBuild: 0, - queryRefine: 0, anchorSearch: 0, - lexicalSearch: 0, - fusion: 0, constraintFilter: 0, eventRetrieval: 0, evidenceRetrieval: 0, @@ -151,7 +141,8 @@ export function createMetrics() { quality: { constraintCoverage: 100, eventPrecisionProxy: 0, - l1AttachRate: 0, // 有 L1 挂载的 floor 占比 + l1AttachRate: 0, + rerankRetentionRate: 0, potentialIssues: [], }, }; @@ -178,6 +169,16 @@ export function calcSimilarityStats(similarities) { }; } +/** + * 格式化权重数组为紧凑字符串 + * @param {number[]|null} weights + * @returns {string} + */ +function fmtWeights(weights) { + if (!weights?.length) return 'N/A'; + return '[' + weights.map(w => (typeof w === 'number' ? w.toFixed(3) : String(w))).join(', ') + ']'; +} + /** * 格式化指标为可读日志 * @param {object} metrics @@ -189,21 +190,27 @@ export function formatMetricsLog(metrics) { lines.push(''); lines.push('════════════════════════════════════════'); - lines.push(' Recall Metrics Report (v4) '); + lines.push(' Recall Metrics Report (v5) '); lines.push('════════════════════════════════════════'); lines.push(''); // Query Length lines.push('[Query Length] 查询长度'); lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`); - lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`); + lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'N/A' : m.query.lengths.v1Chars}`); lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`); lines.push(''); // Query Build lines.push('[Query] 查询构建'); lines.push(`├─ build_time: ${m.query.buildTime}ms`); - lines.push(`└─ refine_time: ${m.query.refineTime}ms`); + lines.push(`├─ refine_time: ${m.query.refineTime}ms`); + lines.push(`├─ r1_weights: ${fmtWeights(m.query.segmentWeights)}`); + if (m.query.r2Weights) { + lines.push(`└─ r2_weights: ${fmtWeights(m.query.r2Weights)}`); + } else { + lines.push(`└─ r2_weights: N/A (no hints)`); + } lines.push(''); // Anchor (L0 StateAtoms) @@ -228,7 +235,13 @@ export function formatMetricsLog(metrics) { // Fusion (W-RRF, floor-level) lines.push('[Fusion] W-RRF (floor-level) - 多路融合'); lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`); + if (m.fusion.denseAggMethod) { + lines.push(`│ └─ aggregation: ${m.fusion.denseAggMethod}`); + } lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`); + if (m.fusion.lexDensityBonus > 0) { + lines.push(`│ └─ density_bonus: ${m.fusion.lexDensityBonus}`); + } lines.push(`├─ total_unique: ${m.fusion.totalUnique}`); lines.push(`├─ after_cap: ${m.fusion.afterCap}`); lines.push(`└─ time: ${m.fusion.time}ms`); @@ -313,14 +326,6 @@ export function formatMetricsLog(metrics) { lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`); lines.push(''); - // Arc - if (m.arc.injected > 0) { - lines.push('[Arc] 人物弧光'); - lines.push(`├─ injected: ${m.arc.injected}`); - lines.push(`└─ tokens: ${m.arc.tokens}`); - lines.push(''); - } - // Formatting lines.push('[Formatting] 格式化'); lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`); @@ -363,6 +368,7 @@ export function formatMetricsLog(metrics) { lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`); lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`); lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`); + lines.push(`├─ rerank_retention_rate: ${m.quality.rerankRetentionRate}%`); if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) { lines.push(`└─ potential_issues:`); @@ -398,6 +404,19 @@ export function detectIssues(metrics) { issues.push('No focus entities extracted - entity lexicon may be empty or messages too short'); } + // 权重极端退化检测 + const segWeights = m.query.segmentWeights || []; + if (segWeights.length > 0) { + const focusWeight = segWeights[segWeights.length - 1] || 0; + if (focusWeight < 0.15) { + issues.push(`Focus segment weight very low (${(focusWeight * 100).toFixed(0)}%) - focus message may be too short`); + } + const allLow = segWeights.every(w => w < 0.1); + if (allLow) { + issues.push('All segment weights below 10% - all messages may be extremely short'); + } + } + // ───────────────────────────────────────────────────────────────── // 锚点匹配问题 // ───────────────────────────────────────────────────────────────── @@ -494,6 +513,16 @@ export function detectIssues(metrics) { } } + // Rerank 保留率 + const retentionRate = m.evidence.floorCandidates > 0 + ? Math.round(m.evidence.floorsSelected / m.evidence.floorCandidates * 100) + : 0; + m.quality.rerankRetentionRate = retentionRate; + + if (m.evidence.floorCandidates > 0 && retentionRate < 25) { + issues.push(`Low rerank retention rate (${retentionRate}%) - fusion ranking poorly aligned with reranker`); + } + // ───────────────────────────────────────────────────────────────── // L1 挂载问题 // ───────────────────────────────────────────────────────────────── diff --git a/modules/story-summary/vector/retrieval/query-builder.js b/modules/story-summary/vector/retrieval/query-builder.js index 4f21b3e..ad38412 100644 --- a/modules/story-summary/vector/retrieval/query-builder.js +++ b/modules/story-summary/vector/retrieval/query-builder.js @@ -2,8 +2,18 @@ // query-builder.js - 确定性查询构建器(无 LLM) // // 职责: -// 1. 从最近消息 + 实体词典构建 QueryBundle_v0 -// 2. 用第一轮召回结果增强为 QueryBundle_v1 +// 1. 从最近 3 条消息构建 QueryBundle(加权向量段) +// 2. 用第一轮召回结果产出 hints 段用于 R2 增强 +// +// 加权向量设计: +// - 每条消息独立 embed,得到独立向量 +// - 按位置分配基础权重(焦点 > 近上下文 > 远上下文) +// - 短消息通过 lengthFactor 自动降权(下限 35%) +// - recall.js 负责 embed + 归一化 + 加权平均 +// +// 焦点确定: +// - pendingUserMessage 存在 → 它是焦点 +// - 否则 → lastMessages 最后一条是焦点 // // 不负责:向量化、检索、rerank // ═══════════════════════════════════════════════════════════════════════════ @@ -15,12 +25,30 @@ import { filterText } from '../utils/text-filter.js'; import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js'; // ───────────────────────────────────────────────────────────────────────── -// 常量 +// 权重常量 +// ───────────────────────────────────────────────────────────────────────── + +// R1 基础权重:[...context(oldest→newest), focus] +// 焦点消息占 55%,最近上下文 30%,更早上下文 15% +export const FOCUS_BASE_WEIGHT = 0.55; +export const CONTEXT_BASE_WEIGHTS = [0.15, 0.30]; + +// R2 基础权重:焦点让权给 hints +export const FOCUS_BASE_WEIGHT_R2 = 0.45; +export const CONTEXT_BASE_WEIGHTS_R2 = [0.10, 0.20]; +export const HINTS_BASE_WEIGHT = 0.25; + +// 长度惩罚:< 50 字线性衰减,下限 35% +export const LENGTH_FULL_THRESHOLD = 50; +export const LENGTH_MIN_FACTOR = 0.35; +// 归一化后的焦点最小占比(由 recall.js 在归一化后硬保底) +// 语义:即使焦点文本很短,也不能被稀释到过低权重 +export const FOCUS_MIN_NORMALIZED_WEIGHT = 0.35; + +// ───────────────────────────────────────────────────────────────────────── +// 其他常量 // ───────────────────────────────────────────────────────────────────────── -// Zero-darkbox policy: -// - No internal truncation. We rely on model-side truncation / provider limits. -// - If provider rejects due to length, we fail loudly and degrade explicitly. const MEMORY_HINT_ATOMS_MAX = 5; const MEMORY_HINT_EVENTS_MAX = 3; const LEXICAL_TERMS_MAX = 10; @@ -41,14 +69,6 @@ function cleanMessageText(text) { .trim(); } -/** - * 截断文本到指定长度 - * @param {string} text - * @param {number} maxLen - * @returns {string} - */ -// truncate removed by design (zero-darkbox) - /** * 清理事件摘要(移除楼层标记) * @param {string} summary @@ -61,9 +81,23 @@ function cleanSummary(summary) { } /** - * 从文本中提取高频实词(用于词法检索) + * 计算长度因子 * - * 使用统一分词器(结巴 + 实体保护 + 停用词过滤),按频率排序 + * charCount >= 50 → 1.0 + * charCount = 0 → 0.35 + * 中间线性插值 + * + * @param {number} charCount - 清洗后内容字符数(不含 speaker 前缀) + * @returns {number} 0.35 ~ 1.0 + */ +export function computeLengthFactor(charCount) { + if (charCount >= LENGTH_FULL_THRESHOLD) return 1.0; + if (charCount <= 0) return LENGTH_MIN_FACTOR; + return LENGTH_MIN_FACTOR + (1.0 - LENGTH_MIN_FACTOR) * (charCount / LENGTH_FULL_THRESHOLD); +} + +/** + * 从文本中提取高频实词(用于词法检索) * * @param {string} text - 清洗后的文本 * @param {number} maxTerms - 最大词数 @@ -72,10 +106,7 @@ function cleanSummary(summary) { function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) { if (!text) return []; - // 使用统一分词器(索引用,不去重,保留词频) const tokens = tokenizerTokenizeForIndex(text); - - // 统计词频 const freq = new Map(); for (const token of tokens) { const key = String(token || '').toLowerCase(); @@ -89,172 +120,203 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) { .map(([term]) => term); } +// ───────────────────────────────────────────────────────────────────────── +// 类型定义 +// ───────────────────────────────────────────────────────────────────────── + /** - * 构建 rerank 专用查询(纯自然语言,不带结构标签) - * - * reranker(bge-reranker-v2-m3)的 query 应为自然语言文本, - * 不含 [ENTITIES] [DIALOGUE] 等结构标签。 - * - * @param {string[]} focusEntities - 焦点实体 - * @param {object[]} lastMessages - 最近 K 条消息 - * @param {string|null} pendingUserMessage - 待发送的用户消息 - * @param {object} context - { name1, name2 } - * @returns {string} + * @typedef {object} QuerySegment + * @property {string} text - 待 embed 的文本(含 speaker 前缀,纯自然语言) + * @property {number} baseWeight - R1 基础权重 + * @property {number} charCount - 内容字符数(不含 speaker 前缀,用于 lengthFactor) */ -function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) { - const parts = []; - - // 实体提示 - if (focusEntities.length > 0) { - parts.push(`关于${focusEntities.join('、')}。`); - } - - // 最近对话原文 - for (const m of (lastMessages || [])) { - const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色'); - const clean = cleanMessageText(m.mes || ''); - if (clean) { - parts.push(`${speaker}:${clean}`); - } - } - - // 待发送消息 - if (pendingUserMessage) { - const clean = cleanMessageText(pendingUserMessage); - if (clean) { - parts.push(`${context.name1 || '用户'}:${clean}`); - } - } - - return parts.join('\n'); -} - -// ───────────────────────────────────────────────────────────────────────── -// QueryBundle 类型定义(JSDoc) -// ───────────────────────────────────────────────────────────────────────── /** * @typedef {object} QueryBundle - * @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1) - * @property {string} queryText_v0 - 第一轮查询文本 - * @property {string|null} queryText_v1 - 第二轮查询文本(refinement 后填充) - * @property {string} rerankQuery - rerank 用的短查询 + * @property {QuerySegment[]} querySegments - R1 向量段(上下文 oldest→newest,焦点在末尾) + * @property {QuerySegment|null} hintsSegment - R2 hints 段(refinement 后填充) + * @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前) * @property {string[]} lexicalTerms - MiniSearch 查询词 - * @property {Set} _lexicon - 实体词典(内部使用) + * @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1) + * @property {Set} _lexicon - 实体词典(内部使用) * @property {Map} _displayMap - 标准化→原词形映射(内部使用) */ // ───────────────────────────────────────────────────────────────────────── -// 阶段 1:构建 QueryBundle_v0 +// 内部:消息条目构建 +// ───────────────────────────────────────────────────────────────────────── + +/** + * @typedef {object} MessageEntry + * @property {string} text - speaker:内容(完整文本) + * @property {number} charCount - 内容字符数(不含 speaker 前缀) + */ + +/** + * 清洗消息并构建条目 + * @param {object} message - chat 消息对象 + * @param {object} context - { name1, name2 } + * @returns {MessageEntry|null} + */ +function buildMessageEntry(message, context) { + if (!message?.mes) return null; + + const speaker = message.is_user + ? (context.name1 || '用户') + : (message.name || context.name2 || '角色'); + + const clean = cleanMessageText(message.mes); + if (!clean) return null; + + return { + text: `${speaker}:${clean}`, + charCount: clean.length, + }; +} + +// ───────────────────────────────────────────────────────────────────────── +// 阶段 1:构建 QueryBundle // ───────────────────────────────────────────────────────────────────────── /** * 构建初始查询包 * - * @param {object[]} lastMessages - 最近 K=2 条消息 + * 消息布局(K=3 时): + * msg[0] = USER(#N-2) 上下文 baseWeight = 0.15 + * msg[1] = AI(#N-1) 上下文 baseWeight = 0.30 + * msg[2] = USER(#N) 焦点 baseWeight = 0.55 + * + * 焦点确定: + * pendingUserMessage 存在 → 焦点,所有 lastMessages 为上下文 + * pendingUserMessage 不存在 → lastMessages[-1] 为焦点,其余为上下文 + * + * @param {object[]} lastMessages - 最近 K 条消息(由 recall.js 传入) * @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息 - * @param {object|null} store - getSummaryStore() 返回值(可选,内部会自动获取) - * @param {object|null} context - { name1, name2 }(可选,内部会自动获取) + * @param {object|null} store + * @param {object|null} context - { name1, name2 } * @returns {QueryBundle} */ export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) { - // 自动获取 store 和 context if (!store) store = getSummaryStore(); if (!context) { const ctx = getContext(); context = { name1: ctx.name1, name2: ctx.name2 }; } - // 1. 构建实体词典 + // 1. 实体词典 const lexicon = buildEntityLexicon(store, context); const displayMap = buildDisplayNameMap(store, context); - // 2. 清洗消息文本 - const dialogueLines = []; - const allCleanText = []; + // 2. 分离焦点与上下文 + const contextEntries = []; + let focusEntry = null; + const allCleanTexts = []; - for (const m of (lastMessages || [])) { - const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色'); - const clean = cleanMessageText(m.mes || ''); - - if (clean) { - // 不使用楼层号,embedding 模型不需要 - // 不截断,零暗箱 - dialogueLines.push(`${speaker}: ${clean}`); - allCleanText.push(clean); - } - } - - // 3. 处理 pendingUserMessage - let pendingClean = ''; if (pendingUserMessage) { - pendingClean = cleanMessageText(pendingUserMessage); + // pending 是焦点,所有 lastMessages 是上下文 + const pendingClean = cleanMessageText(pendingUserMessage); if (pendingClean) { - allCleanText.push(pendingClean); + const speaker = context.name1 || '用户'; + focusEntry = { + text: `${speaker}:${pendingClean}`, + charCount: pendingClean.length, + }; + allCleanTexts.push(pendingClean); + } + + for (const m of (lastMessages || [])) { + const entry = buildMessageEntry(m, context); + if (entry) { + contextEntries.push(entry); + allCleanTexts.push(cleanMessageText(m.mes)); + } + } + } else { + // 无 pending → lastMessages[-1] 是焦点 + const msgs = lastMessages || []; + + if (msgs.length > 0) { + const lastMsg = msgs[msgs.length - 1]; + const entry = buildMessageEntry(lastMsg, context); + if (entry) { + focusEntry = entry; + allCleanTexts.push(cleanMessageText(lastMsg.mes)); + } + } + + for (let i = 0; i < msgs.length - 1; i++) { + const entry = buildMessageEntry(msgs[i], context); + if (entry) { + contextEntries.push(entry); + allCleanTexts.push(cleanMessageText(msgs[i].mes)); + } } } - // 4. 提取焦点实体 - const combinedText = allCleanText.join(' '); + // 3. 提取焦点实体 + const combinedText = allCleanTexts.join(' '); const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap); - // 5. 构建 queryText_v0 - const queryParts = []; + // 4. 构建 querySegments + // 上下文在前(oldest → newest),焦点在末尾 + // 上下文权重从 CONTEXT_BASE_WEIGHTS 尾部对齐分配 + const querySegments = []; - if (focusEntities.length > 0) { - queryParts.push(`[ENTITIES]\n${focusEntities.join('\n')}`); + for (let i = 0; i < contextEntries.length; i++) { + const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS.length - contextEntries.length + i); + querySegments.push({ + text: contextEntries[i].text, + baseWeight: CONTEXT_BASE_WEIGHTS[weightIdx] || CONTEXT_BASE_WEIGHTS[0], + charCount: contextEntries[i].charCount, + }); } - if (dialogueLines.length > 0) { - queryParts.push(`[DIALOGUE]\n${dialogueLines.join('\n')}`); + if (focusEntry) { + querySegments.push({ + text: focusEntry.text, + baseWeight: FOCUS_BASE_WEIGHT, + charCount: focusEntry.charCount, + }); } - if (pendingClean) { - // 不截断,零暗箱 - queryParts.push(`[PENDING_USER]\n${pendingClean}`); - } + // 5. rerankQuery(焦点在前,纯自然语言,无前缀) + const contextLines = contextEntries.map(e => e.text); + const rerankQuery = focusEntry + ? [focusEntry.text, ...contextLines].join('\n') + : contextLines.join('\n'); - const queryText_v0 = queryParts.join('\n\n'); - - // 6. rerankQuery 独立构建(纯自然语言,供 reranker 使用) - const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context); - - // 7. 构建 lexicalTerms + // 6. lexicalTerms(实体优先 + 高频实词补充) const entityTerms = focusEntities.map(e => e.toLowerCase()); const textTerms = extractKeyTerms(combinedText); - - // 合并去重:实体优先 const termSet = new Set(entityTerms); for (const t of textTerms) { if (termSet.size >= LEXICAL_TERMS_MAX) break; termSet.add(t); } - const lexicalTerms = Array.from(termSet); - return { - focusEntities, - queryText_v0, - queryText_v1: null, + querySegments, + hintsSegment: null, rerankQuery, - lexicalTerms, + lexicalTerms: Array.from(termSet), + focusEntities, _lexicon: lexicon, _displayMap: displayMap, }; } // ───────────────────────────────────────────────────────────────────────── -// 阶段 3:Query Refinement(用第一轮召回结果增强) +// 阶段 3:Query Refinement(用第一轮召回结果产出 hints 段) // ───────────────────────────────────────────────────────────────────────── /** * 用第一轮召回结果增强 QueryBundle * * 原地修改 bundle: - * - queryText_v1 = queryText_v0 + [MEMORY_HINTS] - * - focusEntities 可能扩展(从 anchorHits 的 subject/object 中补充) - * - rerankQuery 追加 memory hints 关键词 - * - lexicalTerms 追加 memory hints 关键词 + * - hintsSegment:填充 hints 段(供 R2 加权使用) + * - focusEntities:可能从 anchor hits 的 subject/object 中扩展 + * - lexicalTerms:可能追加 hints 中的关键词 + * - rerankQuery:不变(保持焦点优先的纯自然语言) * * @param {QueryBundle} bundle - 原始查询包 * @param {object[]} anchorHits - 第一轮 L0 命中(按相似度降序) @@ -267,10 +329,7 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) { const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX); for (const hit of topAnchors) { const semantic = hit.atom?.semantic || ''; - if (semantic) { - // 不截断,零暗箱 - hints.push(semantic); - } + if (semantic) hints.push(semantic); } // 2. 从 top eventHits 提取 memory hints @@ -282,18 +341,19 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) { const line = title && summary ? `${title}: ${summary}` : title || summary; - if (line) { - // 不截断,零暗箱 - hints.push(line); - } + if (line) hints.push(line); } - // 3. 构建 queryText_v1(Hints 前置,最优先) + // 3. 构建 hintsSegment if (hints.length > 0) { - const hintText = `[MEMORY_HINTS]\n${hints.join('\n')}`; - bundle.queryText_v1 = hintText + `\n\n` + bundle.queryText_v0; + const hintsText = hints.join('\n'); + bundle.hintsSegment = { + text: hintsText, + baseWeight: HINTS_BASE_WEIGHT, + charCount: hintsText.length, + }; } else { - bundle.queryText_v1 = bundle.queryText_v0; + bundle.hintsSegment = null; } // 4. 从 anchorHits 补充 focusEntities @@ -307,10 +367,13 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) { const atom = hit.atom; if (!atom) continue; - // 检查 subject 和 object for (const field of [atom.subject, atom.object]) { if (!field) continue; - const norm = String(field).normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase(); + const norm = String(field) + .normalize('NFKC') + .replace(/[\u200B-\u200D\uFEFF]/g, '') + .trim() + .toLowerCase(); if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) { existingSet.add(norm); const display = displayMap?.get(norm) || field; @@ -320,8 +383,9 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) { } } - // 5. rerankQuery 保持独立(不随 refinement 变更) - // reranker 需要纯自然语言 query,不受 memory hints 干扰 + // 5. rerankQuery 不变 + // cross-encoder 接收纯自然语言 query,不受 hints 干扰 + // 焦点消息始终在前,保证 reranker 内部截断时保留最关键内容 // 6. 增强 lexicalTerms if (hints.length > 0) { diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js index 8f6d156..8096044 100644 --- a/modules/story-summary/vector/retrieval/recall.js +++ b/modules/story-summary/vector/retrieval/recall.js @@ -1,15 +1,22 @@ // ═══════════════════════════════════════════════════════════════════════════ -// Story Summary - Recall Engine (v7 - Two-Stage: L0 Locate → L1 Evidence) +// Story Summary - Recall Engine (v8 - Weighted Query Vectors + Floor Aggregation) // // 命名规范: // - 存储层用 L0/L1/L2/L3(StateAtom/Chunk/Event/Fact) // - 召回层用语义名称:anchor/evidence/event/constraint // +// v7 → v8 变更: +// - Query 取 3 条消息(对齐 L0 对结构),加权向量合成替代文本拼接 +// - R1 权重 [0.15, 0.30, 0.55](焦点 > 近上下文 > 远上下文) +// - R2 复用 R1 向量 + embed hints 1 条,权重 [0.10, 0.20, 0.45, 0.25] +// - Dense floor 聚合:max → maxSim×0.6 + meanSim×0.4 +// - Lexical floor 聚合:max → maxScore × (1 + 0.3×log₂(hitCount)) +// // 架构: // 阶段 1: Query Build(确定性,无 LLM) -// 阶段 2: Round 1 Dense Retrieval(L0 + L2) -// 阶段 3: Query Refinement(用已命中记忆增强) -// 阶段 4: Round 2 Dense Retrieval(L0 + L2) +// 阶段 2: Round 1 Dense Retrieval(batch embed 3 段 → 加权平均) +// 阶段 3: Query Refinement(用已命中记忆产出 hints 段) +// 阶段 4: Round 2 Dense Retrieval(复用 R1 vec + embed hints → 加权平均) // 阶段 5: Lexical Retrieval // 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对 // 阶段 7: L1 配对组装(L0 → top-1 AI L1 + top-1 USER L1) @@ -21,7 +28,14 @@ import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js'; import { getEngineFingerprint, embed } from '../utils/embedder.js'; import { xbLog } from '../../../../core/debug-core.js'; import { getContext } from '../../../../../../../extensions.js'; -import { buildQueryBundle, refineQueryBundle } from './query-builder.js'; +import { + buildQueryBundle, + refineQueryBundle, + computeLengthFactor, + FOCUS_BASE_WEIGHT_R2, + CONTEXT_BASE_WEIGHTS_R2, + FOCUS_MIN_NORMALIZED_WEIGHT, +} from './query-builder.js'; import { getLexicalIndex, searchLexicalIndex } from './lexical-index.js'; import { rerankChunks } from '../llm/reranker.js'; import { createMetrics, calcSimilarityStats } from './metrics.js'; @@ -33,8 +47,9 @@ const MODULE_ID = 'recall'; // ═══════════════════════════════════════════════════════════════════════════ const CONFIG = { - // 窗口 - LAST_MESSAGES_K: 2, + // 窗口:取 3 条消息(对齐 L0 USER+AI 对结构) + LAST_MESSAGES_K: 3, + LAST_MESSAGES_K_WITH_PENDING: 2, // pending 存在时只取 2 条上下文,避免形成 4 段 // Anchor (L0 StateAtoms) ANCHOR_MIN_SIMILARITY: 0.58, @@ -51,6 +66,13 @@ const CONFIG = { RRF_W_LEX: 0.9, FUSION_CAP: 60, + // Dense floor 聚合权重 + DENSE_AGG_W_MAX: 0.6, + DENSE_AGG_W_MEAN: 0.4, + + // Lexical floor 聚合密度加成 + LEX_DENSITY_BONUS: 0.3, + // Rerank(floor-level) RERANK_TOP_N: 20, RERANK_MIN_SCORE: 0.15, @@ -66,9 +88,6 @@ const CONFIG = { /** * 计算余弦相似度 - * @param {number[]} a - * @param {number[]} b - * @returns {number} */ function cosineSimilarity(a, b) { if (!a?.length || !b?.length || a.length !== b.length) return 0; @@ -83,8 +102,6 @@ function cosineSimilarity(a, b) { /** * 标准化字符串 - * @param {string} s - * @returns {string} */ function normalize(s) { return String(s || '') @@ -96,12 +113,8 @@ function normalize(s) { /** * 获取最近消息 - * @param {object[]} chat - * @param {number} count - * @param {boolean} excludeLastAi - * @returns {object[]} */ -function getLastMessages(chat, count = 2, excludeLastAi = false) { +function getLastMessages(chat, count = 3, excludeLastAi = false) { if (!chat?.length) return []; let messages = [...chat]; if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { @@ -111,18 +124,128 @@ function getLastMessages(chat, count = 2, excludeLastAi = false) { } // ═══════════════════════════════════════════════════════════════════════════ -// MMR 选择算法 +// 加权向量工具 // ═══════════════════════════════════════════════════════════════════════════ /** - * Maximal Marginal Relevance 选择 - * @param {object[]} candidates - * @param {number} k - * @param {number} lambda - * @param {Function} getVector - * @param {Function} getScore - * @returns {object[]} + * 多向量加权平均 + * + * @param {number[][]} vectors - 向量数组 + * @param {number[]} weights - 归一化后的权重(sum = 1) + * @returns {number[]|null} */ +function weightedAverageVectors(vectors, weights) { + if (!vectors?.length || !weights?.length || vectors.length !== weights.length) return null; + + const dims = vectors[0].length; + const result = new Array(dims).fill(0); + + for (let i = 0; i < vectors.length; i++) { + const w = weights[i]; + const v = vectors[i]; + if (!v?.length) continue; + for (let d = 0; d < dims; d++) { + result[d] += w * v[d]; + } + } + + return result; +} + +/** + * 对归一化权重做“目标位最小占比”硬保底 + * + * @param {number[]} weights - 已归一化权重(sum≈1) + * @param {number} targetIdx - 目标位置(focus 段索引) + * @param {number} minWeight - 最小占比(0~1) + * @returns {number[]} 调整后的归一化权重 + */ +function clampMinNormalizedWeight(weights, targetIdx, minWeight) { + if (!weights?.length) return []; + if (targetIdx < 0 || targetIdx >= weights.length) return weights; + + const current = weights[targetIdx]; + if (current >= minWeight) return weights; + + const otherSum = 1 - current; + if (otherSum <= 0) { + const out = new Array(weights.length).fill(0); + out[targetIdx] = 1; + return out; + } + + const remain = 1 - minWeight; + const scale = remain / otherSum; + + const out = weights.map((w, i) => (i === targetIdx ? minWeight : w * scale)); + // 数值稳定性:消除浮点误差 + const drift = 1 - out.reduce((a, b) => a + b, 0); + out[targetIdx] += drift; + return out; +} + +/** + * 计算 R1 段权重(baseWeight × lengthFactor,归一化) + * + * @param {object[]} segments - QuerySegment[] + * @returns {number[]} 归一化后的权重 + */ +function computeSegmentWeights(segments) { + if (!segments?.length) return []; + + const adjusted = segments.map(s => s.baseWeight * computeLengthFactor(s.charCount)); + const sum = adjusted.reduce((a, b) => a + b, 0); + const normalized = sum <= 0 + ? segments.map(() => 1 / segments.length) + : adjusted.map(w => w / sum); + + // focus 段始终在末尾 + const focusIdx = segments.length - 1; + return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT); +} + +/** + * 计算 R2 权重(R1 段用 R2 基础权重 + hints 段,归一化) + * + * @param {object[]} segments - QuerySegment[](与 R1 相同的段) + * @param {object|null} hintsSegment - { text, baseWeight, charCount } + * @returns {number[]} 归一化后的权重(长度 = segments.length + (hints ? 1 : 0)) + */ +function computeR2Weights(segments, hintsSegment) { + if (!segments?.length) return []; + + // 为 R1 段分配 R2 基础权重(尾部对齐) + const contextCount = segments.length - 1; + const r2Base = []; + for (let i = 0; i < contextCount; i++) { + const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS_R2.length - contextCount + i); + r2Base.push(CONTEXT_BASE_WEIGHTS_R2[weightIdx] || CONTEXT_BASE_WEIGHTS_R2[0]); + } + r2Base.push(FOCUS_BASE_WEIGHT_R2); + + // 应用 lengthFactor + const adjusted = r2Base.map((w, i) => w * computeLengthFactor(segments[i].charCount)); + + // 追加 hints + if (hintsSegment) { + adjusted.push(hintsSegment.baseWeight * computeLengthFactor(hintsSegment.charCount)); + } + + // 归一化 + const sum = adjusted.reduce((a, b) => a + b, 0); + const normalized = sum <= 0 + ? adjusted.map(() => 1 / adjusted.length) + : adjusted.map(w => w / sum); + + // R2 中 focus 位置固定为“segments 最后一个” + const focusIdx = segments.length - 1; + return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// MMR 选择算法 +// ═══════════════════════════════════════════════════════════════════════════ + function mmrSelect(candidates, k, lambda, getVector, getScore) { const selected = []; const ids = new Set(); @@ -166,13 +289,6 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) { // [Anchors] L0 StateAtoms 检索 // ═══════════════════════════════════════════════════════════════════════════ -/** - * 检索语义锚点 - * @param {number[]} queryVector - * @param {object} vectorConfig - * @param {object|null} metrics - * @returns {Promise<{hits: object[], floors: Set}>} - */ async function recallAnchors(queryVector, vectorConfig, metrics) { const { chatId } = getContext(); if (!chatId || !queryVector?.length) { @@ -228,15 +344,6 @@ async function recallAnchors(queryVector, vectorConfig, metrics) { // [Events] L2 Events 检索 // ═══════════════════════════════════════════════════════════════════════════ -/** - * 检索事件 - * @param {number[]} queryVector - * @param {object[]} allEvents - * @param {object} vectorConfig - * @param {string[]} focusEntities - * @param {object|null} metrics - * @returns {Promise} - */ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) { const { chatId } = getContext(); if (!chatId || !queryVector?.length || !allEvents?.length) { @@ -344,11 +451,6 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, // [Causation] 因果链追溯 // ═══════════════════════════════════════════════════════════════════════════ -/** - * 构建事件索引 - * @param {object[]} allEvents - * @returns {Map} - */ function buildEventIndex(allEvents) { const map = new Map(); for (const e of allEvents || []) { @@ -357,13 +459,6 @@ function buildEventIndex(allEvents) { return map; } -/** - * 追溯因果链 - * @param {object[]} eventHits - * @param {Map} eventIndex - * @param {number} maxDepth - * @returns {{results: object[], maxDepth: number}} - */ function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { const out = new Map(); const idRe = /^evt-\d+$/; @@ -411,23 +506,9 @@ function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MA } // ═══════════════════════════════════════════════════════════════════════════ -// [W-RRF] 加权倒数排名融合(L0-only) +// [W-RRF] 加权倒数排名融合(floor 粒度) // ═══════════════════════════════════════════════════════════════════════════ -/** - * @typedef {object} RankedItem - * @property {string} id - 唯一标识符 - * @property {number} score - 该路的原始分数 - */ - -/** - * W-RRF 加权倒数排名融合(floor 粒度) - * - * @param {{id: number, score: number}[]} denseRank - Dense 路(floor → max cosine,降序) - * @param {{id: number, score: number}[]} lexRank - Lexical 路(floor → max bm25,降序) - * @param {number} cap - 输出上限 - * @returns {{top: {id: number, fusionScore: number}[], totalUnique: number}} - */ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) { const k = CONFIG.RRF_K; const wD = CONFIG.RRF_W_DENSE; @@ -464,16 +545,6 @@ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) { // [Stage 6] Floor 融合 + Rerank + L1 配对 // ═══════════════════════════════════════════════════════════════════════════ -/** - * Floor 粒度融合 + Rerank + L1 配对 - * - * @param {object[]} anchorHits - L0 dense 命中(Round 2) - * @param {number[]} queryVector - 查询向量(v1) - * @param {string} rerankQuery - rerank 查询文本(纯自然语言) - * @param {object} lexicalResult - 词法检索结果 - * @param {object} metrics - * @returns {Promise<{l0Selected: object[], l1ByFloor: Map}>} - */ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) { const { chatId, chat, name1, name2 } = getContext(); if (!chatId) return { l0Selected: [], l1ByFloor: new Map() }; @@ -481,26 +552,36 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic const T_Start = performance.now(); // ───────────────────────────────────────────────────────────────── - // 6a. Dense floor rank(每个 floor 取 max cosine) + // 6a. Dense floor rank(加权聚合:maxSim×0.6 + meanSim×0.4) // ───────────────────────────────────────────────────────────────── - const denseFloorMap = new Map(); + const denseFloorAgg = new Map(); for (const a of (anchorHits || [])) { - const cur = denseFloorMap.get(a.floor) || 0; - if (a.similarity > cur) denseFloorMap.set(a.floor, a.similarity); + const cur = denseFloorAgg.get(a.floor); + if (!cur) { + denseFloorAgg.set(a.floor, { maxSim: a.similarity, hitCount: 1, sumSim: a.similarity }); + } else { + cur.maxSim = Math.max(cur.maxSim, a.similarity); + cur.hitCount++; + cur.sumSim += a.similarity; + } } - const denseFloorRank = [...denseFloorMap.entries()] - .sort((a, b) => b[1] - a[1]) - .map(([floor, score]) => ({ id: floor, score })); + const denseFloorRank = [...denseFloorAgg.entries()] + .map(([floor, info]) => ({ + id: floor, + score: info.maxSim * CONFIG.DENSE_AGG_W_MAX + + (info.sumSim / info.hitCount) * CONFIG.DENSE_AGG_W_MEAN, + })) + .sort((a, b) => b.score - a.score); // ───────────────────────────────────────────────────────────────── - // 6b. Lexical floor rank(chunkScores → floor 聚合 + USER→AI 映射 + 预过滤) + // 6b. Lexical floor rank(密度加成:maxScore × (1 + 0.3×log₂(hitCount))) // ───────────────────────────────────────────────────────────────── const atomFloorSet = new Set(getStateAtoms().map(a => a.floor)); - const lexFloorScores = new Map(); + const lexFloorAgg = new Map(); for (const { chunkId, score } of (lexicalResult?.chunkScores || [])) { const match = chunkId?.match(/^c-(\d+)-/); if (!match) continue; @@ -519,13 +600,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic // 预过滤:必须有 L0 atoms if (!atomFloorSet.has(floor)) continue; - const cur = lexFloorScores.get(floor) || 0; - if (score > cur) lexFloorScores.set(floor, score); + const cur = lexFloorAgg.get(floor); + if (!cur) { + lexFloorAgg.set(floor, { maxScore: score, hitCount: 1 }); + } else { + cur.maxScore = Math.max(cur.maxScore, score); + cur.hitCount++; + } } - const lexFloorRank = [...lexFloorScores.entries()] - .sort((a, b) => b[1] - a[1]) - .map(([floor, score]) => ({ id: floor, score })); + const lexFloorRank = [...lexFloorAgg.entries()] + .map(([floor, info]) => ({ + id: floor, + score: info.maxScore * (1 + CONFIG.LEX_DENSITY_BONUS * Math.log2(Math.max(1, info.hitCount))), + })) + .sort((a, b) => b.score - a.score); // ───────────────────────────────────────────────────────────────── // 6c. Floor W-RRF 融合 @@ -541,6 +630,8 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic metrics.fusion.totalUnique = totalUnique; metrics.fusion.afterCap = fusedFloors.length; metrics.fusion.time = fusionTime; + metrics.fusion.denseAggMethod = `max×${CONFIG.DENSE_AGG_W_MAX}+mean×${CONFIG.DENSE_AGG_W_MEAN}`; + metrics.fusion.lexDensityBonus = CONFIG.LEX_DENSITY_BONUS; metrics.evidence.floorCandidates = fusedFloors.length; } @@ -617,7 +708,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic } // ───────────────────────────────────────────────────────────────── - // 6f. 并发 Rerank + // 6f. Rerank // ───────────────────────────────────────────────────────────────── const T_Rerank_Start = performance.now(); @@ -647,7 +738,6 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic }; } - // document 平均长度 if (rerankCandidates.length > 0) { const totalLen = rerankCandidates.reduce((s, c) => s + (c.text?.length || 0), 0); metrics.evidence.rerankDocAvgLength = Math.round(totalLen / rerankCandidates.length); @@ -666,6 +756,13 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic atomsByFloor.get(atom.floor).push(atom); } + // 重建 denseFloorMap 以获取每层 max cosine(用于 L0 similarity 标注) + const denseFloorMaxMap = new Map(); + for (const a of (anchorHits || [])) { + const cur = denseFloorMaxMap.get(a.floor) || 0; + if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity); + } + const l0Selected = []; const l1ByFloor = new Map(); let contextPairsAdded = 0; @@ -673,9 +770,9 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic for (const item of reranked) { const floor = item.floor; const rerankScore = item._rerankScore || 0; - const denseSim = denseFloorMap.get(floor) || 0; + const denseSim = denseFloorMaxMap.get(floor) || 0; - // 收集该 floor 所有 L0 atoms,共享 floor 的 rerankScore + // 收集该 floor 所有 L0 atoms const floorAtoms = atomsByFloor.get(floor) || []; for (const atom of floorAtoms) { l0Selected.push({ @@ -735,22 +832,14 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic return { l0Selected, l1ByFloor }; } -// [L1] 拉取 + Cosine 打分(并发子任务) + +// ═══════════════════════════════════════════════════════════════════════════ +// [L1] 拉取 + Cosine 打分 // ═══════════════════════════════════════════════════════════════════════════ -/** - * 从 IndexedDB 拉取指定楼层的 L1 chunks + 向量,用 queryVector cosine 打分 - * - * @param {string} chatId - * @param {number[]} floors - 需要拉取的楼层列表 - * @param {number[]} queryVector - 查询向量(v1) - * @param {object[]} chat - 聊天消息数组 - * @returns {Promise>} floor → scored chunks(带 _cosineScore) - */ async function pullAndScoreL1(chatId, floors, queryVector, chat) { const T0 = performance.now(); - /** @type {Map} */ const result = new Map(); if (!chatId || !floors?.length || !queryVector?.length) { @@ -758,7 +847,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { return result; } - // 拉取 chunks let dbChunks = []; try { dbChunks = await getChunksByFloors(chatId, floors); @@ -773,7 +861,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { return result; } - // 拉取向量 const chunkIds = dbChunks.map(c => c.chunkId); let chunkVectors = []; try { @@ -786,7 +873,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector])); - // Cosine 打分 + 按楼层分组 for (const chunk of dbChunks) { const vec = vectorMap.get(chunk.chunkId); const cosineScore = vec?.length ? cosineSimilarity(queryVector, vec) : 0; @@ -807,7 +893,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { result.get(chunk.floor).push(scored); } - // 每楼层按 cosine 降序排序 for (const [, chunks] of result) { chunks.sort((a, b) => b._cosineScore - a._cosineScore); } @@ -825,16 +910,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { // 主函数 // ═══════════════════════════════════════════════════════════════════════════ -/** - * 执行记忆召回 - * - * @param {object[]} allEvents - 所有事件(L2) - * @param {object} vectorConfig - 向量配置 - * @param {object} options - * @param {boolean} options.excludeLastAi - * @param {string|null} options.pendingUserMessage - * @returns {Promise} - */ export async function recallMemory(allEvents, vectorConfig, options = {}) { const T0 = performance.now(); const { chat } = getContext(); @@ -865,7 +940,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { const T_Build_Start = performance.now(); - const lastMessages = getLastMessages(chat, CONFIG.LAST_MESSAGES_K, excludeLastAi); + const lastMessagesCount = pendingUserMessage + ? CONFIG.LAST_MESSAGES_K_WITH_PENDING + : CONFIG.LAST_MESSAGES_K; + const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi); const bundle = buildQueryBundle(lastMessages, pendingUserMessage); @@ -873,29 +951,39 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { metrics.anchor.focusEntities = bundle.focusEntities; if (metrics.query?.lengths) { - metrics.query.lengths.v0Chars = String(bundle.queryText_v0 || '').length; + metrics.query.lengths.v0Chars = bundle.querySegments.reduce((sum, s) => sum + s.text.length, 0); metrics.query.lengths.v1Chars = null; - metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v0 || '').length; + metrics.query.lengths.rerankChars = String(bundle.rerankQuery || '').length; } xbLog.info(MODULE_ID, - `Query Build: focus=[${bundle.focusEntities.join(',')}] lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]` + `Query Build: focus=[${bundle.focusEntities.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]` ); // ═══════════════════════════════════════════════════════════════════ - // 阶段 2: Round 1 Dense Retrieval + // 阶段 2: Round 1 Dense Retrieval(batch embed → 加权平均) // ═══════════════════════════════════════════════════════════════════ - let queryVector_v0; + const segmentTexts = bundle.querySegments.map(s => s.text); + if (!segmentTexts.length) { + metrics.timing.total = Math.round(performance.now() - T0); + return { + events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [], + focusEntities: bundle.focusEntities, + elapsed: metrics.timing.total, + logText: 'No query segments.', + metrics, + }; + } + + let r1Vectors; try { - const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 10000 }); - queryVector_v0 = vec; + r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 10000 }); } catch (e1) { xbLog.warn(MODULE_ID, 'Round 1 向量化失败,500ms 后重试', e1); await new Promise(r => setTimeout(r, 500)); try { - const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 15000 }); - queryVector_v0 = vec; + r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 15000 }); } catch (e2) { xbLog.error(MODULE_ID, 'Round 1 向量化重试仍失败', e2); metrics.timing.total = Math.round(performance.now() - T0); @@ -909,13 +997,31 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { } } + if (!r1Vectors?.length || r1Vectors.some(v => !v?.length)) { + metrics.timing.total = Math.round(performance.now() - T0); + return { + events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [], + focusEntities: bundle.focusEntities, + elapsed: metrics.timing.total, + logText: 'Empty query vectors (round 1).', + metrics, + }; + } + + const r1Weights = computeSegmentWeights(bundle.querySegments); + const queryVector_v0 = weightedAverageVectors(r1Vectors, r1Weights); + + if (metrics) { + metrics.query.segmentWeights = r1Weights.map(w => Number(w.toFixed(3))); + } + if (!queryVector_v0?.length) { metrics.timing.total = Math.round(performance.now() - T0); return { events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [], focusEntities: bundle.focusEntities, elapsed: metrics.timing.total, - logText: 'Empty query vector (round 1).', + logText: 'Weighted average produced empty vector.', metrics, }; } @@ -929,7 +1035,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { const r1EventTime = Math.round(performance.now() - T_R1_Event_Start); xbLog.info(MODULE_ID, - `Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)` + `Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} weights=[${r1Weights.map(w => w.toFixed(2)).join(',')}] (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)` ); // ═══════════════════════════════════════════════════════════════════ @@ -943,27 +1049,44 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { metrics.query.refineTime = Math.round(performance.now() - T_Refine_Start); metrics.anchor.focusEntities = bundle.focusEntities; - if (metrics.query?.lengths) { - metrics.query.lengths.v1Chars = bundle.queryText_v1 == null ? null : String(bundle.queryText_v1).length; - metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v1 || bundle.queryText_v0 || '').length; + // 更新 v1 长度指标 + if (metrics.query?.lengths && bundle.hintsSegment) { + metrics.query.lengths.v1Chars = metrics.query.lengths.v0Chars + bundle.hintsSegment.text.length; } xbLog.info(MODULE_ID, - `Refinement: focus=[${bundle.focusEntities.join(',')}] hasV1=${!!bundle.queryText_v1} (${metrics.query.refineTime}ms)` + `Refinement: focus=[${bundle.focusEntities.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)` ); // ═══════════════════════════════════════════════════════════════════ - // 阶段 4: Round 2 Dense Retrieval + // 阶段 4: Round 2 Dense Retrieval(复用 R1 向量 + embed hints) // ═══════════════════════════════════════════════════════════════════ - const queryTextFinal = bundle.queryText_v1 || bundle.queryText_v0; - let queryVector_v1; - try { - const [vec] = await embed([queryTextFinal], vectorConfig, { timeout: 10000 }); - queryVector_v1 = vec; - } catch (e) { - xbLog.warn(MODULE_ID, 'Round 2 向量化失败,降级使用 Round 1 向量', e); + + if (bundle.hintsSegment) { + try { + const [hintsVec] = await embed([bundle.hintsSegment.text], vectorConfig, { timeout: 10000 }); + + if (hintsVec?.length) { + const r2Weights = computeR2Weights(bundle.querySegments, bundle.hintsSegment); + queryVector_v1 = weightedAverageVectors([...r1Vectors, hintsVec], r2Weights); + + if (metrics) { + metrics.query.r2Weights = r2Weights.map(w => Number(w.toFixed(3))); + } + + xbLog.info(MODULE_ID, + `Round 2 weights: [${r2Weights.map(w => w.toFixed(2)).join(',')}]` + ); + } else { + queryVector_v1 = queryVector_v0; + } + } catch (e) { + xbLog.warn(MODULE_ID, 'Round 2 hints 向量化失败,降级使用 Round 1 向量', e); + queryVector_v1 = queryVector_v0; + } + } else { queryVector_v1 = queryVector_v0; } @@ -1082,13 +1205,14 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { metrics.event.entityNames = bundle.focusEntities; metrics.event.entitiesUsed = bundle.focusEntities.length; - console.group('%c[Recall v7]', 'color: #7c3aed; font-weight: bold'); + console.group('%c[Recall v8]', 'color: #7c3aed; font-weight: bold'); console.log(`Total: ${metrics.timing.total}ms`); console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`); + console.log(`R1 weights: [${r1Weights.map(w => w.toFixed(2)).join(', ')}]`); console.log(`Focus: [${bundle.focusEntities.join(', ')}]`); console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`); console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length}`); - console.log(`Fusion (floor): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`); + console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`); console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0} → ${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`); console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`); console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);