feat(recall): clamp focus weight and adjust pending context window

This commit is contained in:
2026-02-11 17:21:04 +08:00
parent 297cc03770
commit 816196a710
3 changed files with 544 additions and 327 deletions

View File

@@ -1,16 +1,12 @@
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence) // Story Summary - Metrics Collector (v5 - Weighted Query + Floor Aggregation)
// //
// 命名规范 // v4 → v5 变更
// - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact // - query: 新增 segmentWeights / r2Weights加权向量诊断
// - 指标层用语义名称anchor/evidence/event/constraint/arc // - fusion: 新增 denseAggMethod / lexDensityBonus聚合策略可观测
// // - quality: 新增 rerankRetentionRate粗排-精排一致性)
// 架构变更v3 → v4 // - 移除 timing 中从未写入的死字段queryBuild/queryRefine/lexicalSearch/fusion
// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构 // - 移除从未写入的 arc 区块
// - 删除 mergedByType / selectedByType不再有混合池
// - 新增 floorCandidates / floorsSelected / l0Collected / l1Pulled / l1Attached / l1CosineTime
// - fusion 区块明确标注 L0-only删除 anchorCount
// - quality.chunkRealRatio → quality.l1AttachRate
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/** /**
@@ -25,9 +21,11 @@ export function createMetrics() {
refineTime: 0, refineTime: 0,
lengths: { lengths: {
v0Chars: 0, v0Chars: 0,
v1Chars: null, // null = NA v1Chars: null, // null = 无 hints
rerankChars: 0, rerankChars: 0,
}, },
segmentWeights: [], // R1 归一化后权重 [context..., focus]
r2Weights: null, // R2 归一化后权重 [context..., focus, hints]null = 无 hints
}, },
// Anchor (L0 StateAtoms) - 语义锚点 // Anchor (L0 StateAtoms) - 语义锚点
@@ -55,6 +53,8 @@ export function createMetrics() {
totalUnique: 0, totalUnique: 0,
afterCap: 0, afterCap: 0,
time: 0, time: 0,
denseAggMethod: '', // 聚合方法描述(如 "max×0.6+mean×0.4"
lexDensityBonus: 0, // 密度加成系数
}, },
// Constraint (L3 Facts) - 世界约束 // Constraint (L3 Facts) - 世界约束
@@ -83,34 +83,28 @@ export function createMetrics() {
// Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据 // Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据
evidence: { evidence: {
// Stage 1: Floor // Stage 1: Floor
floorCandidates: 0, // W-RRF 融合后的 floor 候选数 floorCandidates: 0,
floorsSelected: 0, // rerank 后选中的 floor 数 floorsSelected: 0,
l0Collected: 0, // 选中 floor 中收集的 L0 atom 总数 l0Collected: 0,
rerankApplied: false, rerankApplied: false,
rerankFailed: false, rerankFailed: false,
beforeRerank: 0, beforeRerank: 0,
afterRerank: 0, afterRerank: 0,
rerankTime: 0, rerankTime: 0,
rerankScores: null, rerankScores: null,
rerankDocAvgLength: 0, // rerank document 平均字符数 rerankDocAvgLength: 0,
// Stage 2: L1 // Stage 2: L1
l1Pulled: 0, // 从 DB 拉取的 L1 chunk 总数 l1Pulled: 0,
l1Attached: 0, // 实际挂载的 L1 数top-1 × floor × 2侧 l1Attached: 0,
l1CosineTime: 0, // L1 cosine 打分耗时 l1CosineTime: 0,
// 装配 // 装配
contextPairsAdded: 0, // USER 侧挂载数量 contextPairsAdded: 0,
tokens: 0, tokens: 0,
assemblyTime: 0, assemblyTime: 0,
}, },
// Arc - 人物弧光
arc: {
injected: 0,
tokens: 0,
},
// Formatting - 格式化 // Formatting - 格式化
formatting: { formatting: {
sectionsIncluded: [], sectionsIncluded: [],
@@ -131,13 +125,9 @@ export function createMetrics() {
}, },
}, },
// Timing - 计时 // Timing - 计时(仅包含实际写入的字段)
timing: { timing: {
queryBuild: 0,
queryRefine: 0,
anchorSearch: 0, anchorSearch: 0,
lexicalSearch: 0,
fusion: 0,
constraintFilter: 0, constraintFilter: 0,
eventRetrieval: 0, eventRetrieval: 0,
evidenceRetrieval: 0, evidenceRetrieval: 0,
@@ -151,7 +141,8 @@ export function createMetrics() {
quality: { quality: {
constraintCoverage: 100, constraintCoverage: 100,
eventPrecisionProxy: 0, eventPrecisionProxy: 0,
l1AttachRate: 0, // 有 L1 挂载的 floor 占比 l1AttachRate: 0,
rerankRetentionRate: 0,
potentialIssues: [], potentialIssues: [],
}, },
}; };
@@ -178,6 +169,16 @@ export function calcSimilarityStats(similarities) {
}; };
} }
/**
* 格式化权重数组为紧凑字符串
* @param {number[]|null} weights
* @returns {string}
*/
function fmtWeights(weights) {
if (!weights?.length) return 'N/A';
return '[' + weights.map(w => (typeof w === 'number' ? w.toFixed(3) : String(w))).join(', ') + ']';
}
/** /**
* 格式化指标为可读日志 * 格式化指标为可读日志
* @param {object} metrics * @param {object} metrics
@@ -189,21 +190,27 @@ export function formatMetricsLog(metrics) {
lines.push(''); lines.push('');
lines.push('════════════════════════════════════════'); lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report (v4) '); lines.push(' Recall Metrics Report (v5) ');
lines.push('════════════════════════════════════════'); lines.push('════════════════════════════════════════');
lines.push(''); lines.push('');
// Query Length // Query Length
lines.push('[Query Length] 查询长度'); lines.push('[Query Length] 查询长度');
lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`); lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`); lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'N/A' : m.query.lengths.v1Chars}`);
lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`); lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
lines.push(''); lines.push('');
// Query Build // Query Build
lines.push('[Query] 查询构建'); lines.push('[Query] 查询构建');
lines.push(`├─ build_time: ${m.query.buildTime}ms`); lines.push(`├─ build_time: ${m.query.buildTime}ms`);
lines.push(`─ refine_time: ${m.query.refineTime}ms`); lines.push(`─ refine_time: ${m.query.refineTime}ms`);
lines.push(`├─ r1_weights: ${fmtWeights(m.query.segmentWeights)}`);
if (m.query.r2Weights) {
lines.push(`└─ r2_weights: ${fmtWeights(m.query.r2Weights)}`);
} else {
lines.push(`└─ r2_weights: N/A (no hints)`);
}
lines.push(''); lines.push('');
// Anchor (L0 StateAtoms) // Anchor (L0 StateAtoms)
@@ -228,7 +235,13 @@ export function formatMetricsLog(metrics) {
// Fusion (W-RRF, floor-level) // Fusion (W-RRF, floor-level)
lines.push('[Fusion] W-RRF (floor-level) - 多路融合'); lines.push('[Fusion] W-RRF (floor-level) - 多路融合');
lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`); lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`);
if (m.fusion.denseAggMethod) {
lines.push(`│ └─ aggregation: ${m.fusion.denseAggMethod}`);
}
lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`); lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`);
if (m.fusion.lexDensityBonus > 0) {
lines.push(`│ └─ density_bonus: ${m.fusion.lexDensityBonus}`);
}
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`); lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
lines.push(`├─ after_cap: ${m.fusion.afterCap}`); lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
lines.push(`└─ time: ${m.fusion.time}ms`); lines.push(`└─ time: ${m.fusion.time}ms`);
@@ -313,14 +326,6 @@ export function formatMetricsLog(metrics) {
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`); lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push(''); lines.push('');
// Arc
if (m.arc.injected > 0) {
lines.push('[Arc] 人物弧光');
lines.push(`├─ injected: ${m.arc.injected}`);
lines.push(`└─ tokens: ${m.arc.tokens}`);
lines.push('');
}
// Formatting // Formatting
lines.push('[Formatting] 格式化'); lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`); lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
@@ -363,6 +368,7 @@ export function formatMetricsLog(metrics) {
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`); lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`); lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`); lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
lines.push(`├─ rerank_retention_rate: ${m.quality.rerankRetentionRate}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) { if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`); lines.push(`└─ potential_issues:`);
@@ -398,6 +404,19 @@ export function detectIssues(metrics) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short'); issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
} }
// 权重极端退化检测
const segWeights = m.query.segmentWeights || [];
if (segWeights.length > 0) {
const focusWeight = segWeights[segWeights.length - 1] || 0;
if (focusWeight < 0.15) {
issues.push(`Focus segment weight very low (${(focusWeight * 100).toFixed(0)}%) - focus message may be too short`);
}
const allLow = segWeights.every(w => w < 0.1);
if (allLow) {
issues.push('All segment weights below 10% - all messages may be extremely short');
}
}
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// 锚点匹配问题 // 锚点匹配问题
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
@@ -494,6 +513,16 @@ export function detectIssues(metrics) {
} }
} }
// Rerank 保留率
const retentionRate = m.evidence.floorCandidates > 0
? Math.round(m.evidence.floorsSelected / m.evidence.floorCandidates * 100)
: 0;
m.quality.rerankRetentionRate = retentionRate;
if (m.evidence.floorCandidates > 0 && retentionRate < 25) {
issues.push(`Low rerank retention rate (${retentionRate}%) - fusion ranking poorly aligned with reranker`);
}
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// L1 挂载问题 // L1 挂载问题
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────

View File

@@ -2,8 +2,18 @@
// query-builder.js - 确定性查询构建器(无 LLM // query-builder.js - 确定性查询构建器(无 LLM
// //
// 职责: // 职责:
// 1. 从最近消息 + 实体词典构建 QueryBundle_v0 // 1. 从最近 3 条消息构建 QueryBundle(加权向量段)
// 2. 用第一轮召回结果增强为 QueryBundle_v1 // 2. 用第一轮召回结果产出 hints 段用于 R2 增强
//
// 加权向量设计:
// - 每条消息独立 embed得到独立向量
// - 按位置分配基础权重(焦点 > 近上下文 > 远上下文)
// - 短消息通过 lengthFactor 自动降权(下限 35%
// - recall.js 负责 embed + 归一化 + 加权平均
//
// 焦点确定:
// - pendingUserMessage 存在 → 它是焦点
// - 否则 → lastMessages 最后一条是焦点
// //
// 不负责向量化、检索、rerank // 不负责向量化、检索、rerank
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -15,12 +25,30 @@ import { filterText } from '../utils/text-filter.js';
import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js'; import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
// 常量 // 权重常量
// ─────────────────────────────────────────────────────────────────────────
// R1 基础权重:[...context(oldest→newest), focus]
// 焦点消息占 55%,最近上下文 30%,更早上下文 15%
export const FOCUS_BASE_WEIGHT = 0.55;
export const CONTEXT_BASE_WEIGHTS = [0.15, 0.30];
// R2 基础权重:焦点让权给 hints
export const FOCUS_BASE_WEIGHT_R2 = 0.45;
export const CONTEXT_BASE_WEIGHTS_R2 = [0.10, 0.20];
export const HINTS_BASE_WEIGHT = 0.25;
// 长度惩罚:< 50 字线性衰减,下限 35%
export const LENGTH_FULL_THRESHOLD = 50;
export const LENGTH_MIN_FACTOR = 0.35;
// 归一化后的焦点最小占比(由 recall.js 在归一化后硬保底)
// 语义:即使焦点文本很短,也不能被稀释到过低权重
export const FOCUS_MIN_NORMALIZED_WEIGHT = 0.35;
// ─────────────────────────────────────────────────────────────────────────
// 其他常量
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
// Zero-darkbox policy:
// - No internal truncation. We rely on model-side truncation / provider limits.
// - If provider rejects due to length, we fail loudly and degrade explicitly.
const MEMORY_HINT_ATOMS_MAX = 5; const MEMORY_HINT_ATOMS_MAX = 5;
const MEMORY_HINT_EVENTS_MAX = 3; const MEMORY_HINT_EVENTS_MAX = 3;
const LEXICAL_TERMS_MAX = 10; const LEXICAL_TERMS_MAX = 10;
@@ -41,14 +69,6 @@ function cleanMessageText(text) {
.trim(); .trim();
} }
/**
* 截断文本到指定长度
* @param {string} text
* @param {number} maxLen
* @returns {string}
*/
// truncate removed by design (zero-darkbox)
/** /**
* 清理事件摘要(移除楼层标记) * 清理事件摘要(移除楼层标记)
* @param {string} summary * @param {string} summary
@@ -61,9 +81,23 @@ function cleanSummary(summary) {
} }
/** /**
* 从文本中提取高频实词(用于词法检索) * 计算长度因子
* *
* 使用统一分词器(结巴 + 实体保护 + 停用词过滤),按频率排序 * charCount >= 50 → 1.0
* charCount = 0 → 0.35
* 中间线性插值
*
* @param {number} charCount - 清洗后内容字符数(不含 speaker 前缀)
* @returns {number} 0.35 ~ 1.0
*/
export function computeLengthFactor(charCount) {
if (charCount >= LENGTH_FULL_THRESHOLD) return 1.0;
if (charCount <= 0) return LENGTH_MIN_FACTOR;
return LENGTH_MIN_FACTOR + (1.0 - LENGTH_MIN_FACTOR) * (charCount / LENGTH_FULL_THRESHOLD);
}
/**
* 从文本中提取高频实词(用于词法检索)
* *
* @param {string} text - 清洗后的文本 * @param {string} text - 清洗后的文本
* @param {number} maxTerms - 最大词数 * @param {number} maxTerms - 最大词数
@@ -72,10 +106,7 @@ function cleanSummary(summary) {
function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) { function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
if (!text) return []; if (!text) return [];
// 使用统一分词器(索引用,不去重,保留词频)
const tokens = tokenizerTokenizeForIndex(text); const tokens = tokenizerTokenizeForIndex(text);
// 统计词频
const freq = new Map(); const freq = new Map();
for (const token of tokens) { for (const token of tokens) {
const key = String(token || '').toLowerCase(); const key = String(token || '').toLowerCase();
@@ -89,172 +120,203 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
.map(([term]) => term); .map(([term]) => term);
} }
// ─────────────────────────────────────────────────────────────────────────
// 类型定义
// ─────────────────────────────────────────────────────────────────────────
/** /**
* 构建 rerank 专用查询(纯自然语言,不带结构标签) * @typedef {object} QuerySegment
* * @property {string} text - 待 embed 的文本(含 speaker 前缀,纯自然语言)
* rerankerbge-reranker-v2-m3的 query 应为自然语言文本, * @property {number} baseWeight - R1 基础权重
* 不含 [ENTITIES] [DIALOGUE] 等结构标签。 * @property {number} charCount - 内容字符数(不含 speaker 前缀,用于 lengthFactor
*
* @param {string[]} focusEntities - 焦点实体
* @param {object[]} lastMessages - 最近 K 条消息
* @param {string|null} pendingUserMessage - 待发送的用户消息
* @param {object} context - { name1, name2 }
* @returns {string}
*/ */
function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) {
const parts = [];
// 实体提示
if (focusEntities.length > 0) {
parts.push(`关于${focusEntities.join('、')}`);
}
// 最近对话原文
for (const m of (lastMessages || [])) {
const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
const clean = cleanMessageText(m.mes || '');
if (clean) {
parts.push(`${speaker}${clean}`);
}
}
// 待发送消息
if (pendingUserMessage) {
const clean = cleanMessageText(pendingUserMessage);
if (clean) {
parts.push(`${context.name1 || '用户'}${clean}`);
}
}
return parts.join('\n');
}
// ─────────────────────────────────────────────────────────────────────────
// QueryBundle 类型定义JSDoc
// ─────────────────────────────────────────────────────────────────────────
/** /**
* @typedef {object} QueryBundle * @typedef {object} QueryBundle
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1 * @property {QuerySegment[]} querySegments - R1 向量段(上下文 oldest→newest焦点在末尾
* @property {string} queryText_v0 - 第一轮查询文本 * @property {QuerySegment|null} hintsSegment - R2 hints 段refinement 后填充)
* @property {string|null} queryText_v1 - 第二轮查询文本refinement 后填充 * @property {string} rerankQuery - rerank 用的纯自然语言查询(焦点在前
* @property {string} rerankQuery - rerank 用的短查询
* @property {string[]} lexicalTerms - MiniSearch 查询词 * @property {string[]} lexicalTerms - MiniSearch 查询词
* @property {string[]} focusEntities - 焦点实体(原词形,已排除 name1
* @property {Set<string>} _lexicon - 实体词典(内部使用) * @property {Set<string>} _lexicon - 实体词典(内部使用)
* @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用) * @property {Map<string, string>} _displayMap - 标准化→原词形映射(内部使用)
*/ */
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
// 阶段 1构建 QueryBundle_v0 // 内部:消息条目构建
// ─────────────────────────────────────────────────────────────────────────
/**
* @typedef {object} MessageEntry
* @property {string} text - speaker内容完整文本
* @property {number} charCount - 内容字符数(不含 speaker 前缀)
*/
/**
* 清洗消息并构建条目
* @param {object} message - chat 消息对象
* @param {object} context - { name1, name2 }
* @returns {MessageEntry|null}
*/
function buildMessageEntry(message, context) {
if (!message?.mes) return null;
const speaker = message.is_user
? (context.name1 || '用户')
: (message.name || context.name2 || '角色');
const clean = cleanMessageText(message.mes);
if (!clean) return null;
return {
text: `${speaker}${clean}`,
charCount: clean.length,
};
}
// ─────────────────────────────────────────────────────────────────────────
// 阶段 1构建 QueryBundle
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
/** /**
* 构建初始查询包 * 构建初始查询包
* *
* @param {object[]} lastMessages - 最近 K=2 条消息 * 消息布局K=3 时):
* msg[0] = USER(#N-2) 上下文 baseWeight = 0.15
* msg[1] = AI(#N-1) 上下文 baseWeight = 0.30
* msg[2] = USER(#N) 焦点 baseWeight = 0.55
*
* 焦点确定:
* pendingUserMessage 存在 → 焦点,所有 lastMessages 为上下文
* pendingUserMessage 不存在 → lastMessages[-1] 为焦点,其余为上下文
*
* @param {object[]} lastMessages - 最近 K 条消息(由 recall.js 传入)
* @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息 * @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息
* @param {object|null} store - getSummaryStore() 返回值(可选,内部会自动获取) * @param {object|null} store
* @param {object|null} context - { name1, name2 }(可选,内部会自动获取) * @param {object|null} context - { name1, name2 }
* @returns {QueryBundle} * @returns {QueryBundle}
*/ */
export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) { export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) {
// 自动获取 store 和 context
if (!store) store = getSummaryStore(); if (!store) store = getSummaryStore();
if (!context) { if (!context) {
const ctx = getContext(); const ctx = getContext();
context = { name1: ctx.name1, name2: ctx.name2 }; context = { name1: ctx.name1, name2: ctx.name2 };
} }
// 1. 构建实体词典 // 1. 实体词典
const lexicon = buildEntityLexicon(store, context); const lexicon = buildEntityLexicon(store, context);
const displayMap = buildDisplayNameMap(store, context); const displayMap = buildDisplayNameMap(store, context);
// 2. 清洗消息文本 // 2. 分离焦点与上下文
const dialogueLines = []; const contextEntries = [];
const allCleanText = []; let focusEntry = null;
const allCleanTexts = [];
if (pendingUserMessage) {
// pending 是焦点,所有 lastMessages 是上下文
const pendingClean = cleanMessageText(pendingUserMessage);
if (pendingClean) {
const speaker = context.name1 || '用户';
focusEntry = {
text: `${speaker}${pendingClean}`,
charCount: pendingClean.length,
};
allCleanTexts.push(pendingClean);
}
for (const m of (lastMessages || [])) { for (const m of (lastMessages || [])) {
const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色'); const entry = buildMessageEntry(m, context);
const clean = cleanMessageText(m.mes || ''); if (entry) {
contextEntries.push(entry);
allCleanTexts.push(cleanMessageText(m.mes));
}
}
} else {
// 无 pending → lastMessages[-1] 是焦点
const msgs = lastMessages || [];
if (clean) { if (msgs.length > 0) {
// 不使用楼层号embedding 模型不需要 const lastMsg = msgs[msgs.length - 1];
// 不截断,零暗箱 const entry = buildMessageEntry(lastMsg, context);
dialogueLines.push(`${speaker}: ${clean}`); if (entry) {
allCleanText.push(clean); focusEntry = entry;
allCleanTexts.push(cleanMessageText(lastMsg.mes));
} }
} }
// 3. 处理 pendingUserMessage for (let i = 0; i < msgs.length - 1; i++) {
let pendingClean = ''; const entry = buildMessageEntry(msgs[i], context);
if (pendingUserMessage) { if (entry) {
pendingClean = cleanMessageText(pendingUserMessage); contextEntries.push(entry);
if (pendingClean) { allCleanTexts.push(cleanMessageText(msgs[i].mes));
allCleanText.push(pendingClean); }
} }
} }
// 4. 提取焦点实体 // 3. 提取焦点实体
const combinedText = allCleanText.join(' '); const combinedText = allCleanTexts.join(' ');
const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap); const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
// 5. 构建 queryText_v0 // 4. 构建 querySegments
const queryParts = []; // 上下文在前oldest → newest焦点在末尾
// 上下文权重从 CONTEXT_BASE_WEIGHTS 尾部对齐分配
const querySegments = [];
if (focusEntities.length > 0) { for (let i = 0; i < contextEntries.length; i++) {
queryParts.push(`[ENTITIES]\n${focusEntities.join('\n')}`); const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS.length - contextEntries.length + i);
querySegments.push({
text: contextEntries[i].text,
baseWeight: CONTEXT_BASE_WEIGHTS[weightIdx] || CONTEXT_BASE_WEIGHTS[0],
charCount: contextEntries[i].charCount,
});
} }
if (dialogueLines.length > 0) { if (focusEntry) {
queryParts.push(`[DIALOGUE]\n${dialogueLines.join('\n')}`); querySegments.push({
text: focusEntry.text,
baseWeight: FOCUS_BASE_WEIGHT,
charCount: focusEntry.charCount,
});
} }
if (pendingClean) { // 5. rerankQuery焦点在前纯自然语言无前缀
// 不截断,零暗箱 const contextLines = contextEntries.map(e => e.text);
queryParts.push(`[PENDING_USER]\n${pendingClean}`); const rerankQuery = focusEntry
} ? [focusEntry.text, ...contextLines].join('\n')
: contextLines.join('\n');
const queryText_v0 = queryParts.join('\n\n'); // 6. lexicalTerms实体优先 + 高频实词补充)
// 6. rerankQuery 独立构建(纯自然语言,供 reranker 使用)
const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context);
// 7. 构建 lexicalTerms
const entityTerms = focusEntities.map(e => e.toLowerCase()); const entityTerms = focusEntities.map(e => e.toLowerCase());
const textTerms = extractKeyTerms(combinedText); const textTerms = extractKeyTerms(combinedText);
// 合并去重:实体优先
const termSet = new Set(entityTerms); const termSet = new Set(entityTerms);
for (const t of textTerms) { for (const t of textTerms) {
if (termSet.size >= LEXICAL_TERMS_MAX) break; if (termSet.size >= LEXICAL_TERMS_MAX) break;
termSet.add(t); termSet.add(t);
} }
const lexicalTerms = Array.from(termSet);
return { return {
focusEntities, querySegments,
queryText_v0, hintsSegment: null,
queryText_v1: null,
rerankQuery, rerankQuery,
lexicalTerms, lexicalTerms: Array.from(termSet),
focusEntities,
_lexicon: lexicon, _lexicon: lexicon,
_displayMap: displayMap, _displayMap: displayMap,
}; };
} }
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
// 阶段 3Query Refinement用第一轮召回结果增强 // 阶段 3Query Refinement用第一轮召回结果产出 hints 段
// ───────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────
/** /**
* 用第一轮召回结果增强 QueryBundle * 用第一轮召回结果增强 QueryBundle
* *
* 原地修改 bundle * 原地修改 bundle
* - queryText_v1 = queryText_v0 + [MEMORY_HINTS] * - hintsSegment填充 hints 段(供 R2 加权使用)
* - focusEntities 可能扩展(从 anchorHits 的 subject/object 中补充) * - focusEntities可能从 anchor hits 的 subject/object 中扩展
* - rerankQuery 追加 memory hints 关键词 * - lexicalTerms可能追加 hints 中的关键词
* - lexicalTerms 追加 memory hints 关键词 * - rerankQuery不变保持焦点优先的纯自然语言
* *
* @param {QueryBundle} bundle - 原始查询包 * @param {QueryBundle} bundle - 原始查询包
* @param {object[]} anchorHits - 第一轮 L0 命中(按相似度降序) * @param {object[]} anchorHits - 第一轮 L0 命中(按相似度降序)
@@ -267,10 +329,7 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX); const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX);
for (const hit of topAnchors) { for (const hit of topAnchors) {
const semantic = hit.atom?.semantic || ''; const semantic = hit.atom?.semantic || '';
if (semantic) { if (semantic) hints.push(semantic);
// 不截断,零暗箱
hints.push(semantic);
}
} }
// 2. 从 top eventHits 提取 memory hints // 2. 从 top eventHits 提取 memory hints
@@ -282,18 +341,19 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
const line = title && summary const line = title && summary
? `${title}: ${summary}` ? `${title}: ${summary}`
: title || summary; : title || summary;
if (line) { if (line) hints.push(line);
// 不截断,零暗箱
hints.push(line);
}
} }
// 3. 构建 queryText_v1Hints 前置,最优先) // 3. 构建 hintsSegment
if (hints.length > 0) { if (hints.length > 0) {
const hintText = `[MEMORY_HINTS]\n${hints.join('\n')}`; const hintsText = hints.join('\n');
bundle.queryText_v1 = hintText + `\n\n` + bundle.queryText_v0; bundle.hintsSegment = {
text: hintsText,
baseWeight: HINTS_BASE_WEIGHT,
charCount: hintsText.length,
};
} else { } else {
bundle.queryText_v1 = bundle.queryText_v0; bundle.hintsSegment = null;
} }
// 4. 从 anchorHits 补充 focusEntities // 4. 从 anchorHits 补充 focusEntities
@@ -307,10 +367,13 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
const atom = hit.atom; const atom = hit.atom;
if (!atom) continue; if (!atom) continue;
// 检查 subject 和 object
for (const field of [atom.subject, atom.object]) { for (const field of [atom.subject, atom.object]) {
if (!field) continue; if (!field) continue;
const norm = String(field).normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase(); const norm = String(field)
.normalize('NFKC')
.replace(/[\u200B-\u200D\uFEFF]/g, '')
.trim()
.toLowerCase();
if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) { if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) {
existingSet.add(norm); existingSet.add(norm);
const display = displayMap?.get(norm) || field; const display = displayMap?.get(norm) || field;
@@ -320,8 +383,9 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
} }
} }
// 5. rerankQuery 保持独立(不随 refinement 变更) // 5. rerankQuery 不变
// reranker 需要纯自然语言 query不受 memory hints 干扰 // cross-encoder 接收纯自然语言 query不受 hints 干扰
// 焦点消息始终在前,保证 reranker 内部截断时保留最关键内容
// 6. 增强 lexicalTerms // 6. 增强 lexicalTerms
if (hints.length > 0) { if (hints.length > 0) {

View File

@@ -1,15 +1,22 @@
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Recall Engine (v7 - Two-Stage: L0 Locate → L1 Evidence) // Story Summary - Recall Engine (v8 - Weighted Query Vectors + Floor Aggregation)
// //
// 命名规范: // 命名规范:
// - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact // - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact
// - 召回层用语义名称anchor/evidence/event/constraint // - 召回层用语义名称anchor/evidence/event/constraint
// //
// v7 → v8 变更:
// - Query 取 3 条消息(对齐 L0 对结构),加权向量合成替代文本拼接
// - R1 权重 [0.15, 0.30, 0.55](焦点 > 近上下文 > 远上下文)
// - R2 复用 R1 向量 + embed hints 1 条,权重 [0.10, 0.20, 0.45, 0.25]
// - Dense floor 聚合max → maxSim×0.6 + meanSim×0.4
// - Lexical floor 聚合max → maxScore × (1 + 0.3×log₂(hitCount))
//
// 架构: // 架构:
// 阶段 1: Query Build确定性无 LLM // 阶段 1: Query Build确定性无 LLM
// 阶段 2: Round 1 Dense RetrievalL0 + L2 // 阶段 2: Round 1 Dense Retrievalbatch embed 3 段 → 加权平均
// 阶段 3: Query Refinement用已命中记忆增强 // 阶段 3: Query Refinement用已命中记忆产出 hints 段
// 阶段 4: Round 2 Dense RetrievalL0 + L2 // 阶段 4: Round 2 Dense Retrieval复用 R1 vec + embed hints → 加权平均
// 阶段 5: Lexical Retrieval // 阶段 5: Lexical Retrieval
// 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对 // 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对
// 阶段 7: L1 配对组装L0 → top-1 AI L1 + top-1 USER L1 // 阶段 7: L1 配对组装L0 → top-1 AI L1 + top-1 USER L1
@@ -21,7 +28,14 @@ import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
import { getEngineFingerprint, embed } from '../utils/embedder.js'; import { getEngineFingerprint, embed } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { buildQueryBundle, refineQueryBundle } from './query-builder.js'; import {
buildQueryBundle,
refineQueryBundle,
computeLengthFactor,
FOCUS_BASE_WEIGHT_R2,
CONTEXT_BASE_WEIGHTS_R2,
FOCUS_MIN_NORMALIZED_WEIGHT,
} from './query-builder.js';
import { getLexicalIndex, searchLexicalIndex } from './lexical-index.js'; import { getLexicalIndex, searchLexicalIndex } from './lexical-index.js';
import { rerankChunks } from '../llm/reranker.js'; import { rerankChunks } from '../llm/reranker.js';
import { createMetrics, calcSimilarityStats } from './metrics.js'; import { createMetrics, calcSimilarityStats } from './metrics.js';
@@ -33,8 +47,9 @@ const MODULE_ID = 'recall';
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
const CONFIG = { const CONFIG = {
// 窗口 // 窗口:取 3 条消息(对齐 L0 USER+AI 对结构)
LAST_MESSAGES_K: 2, LAST_MESSAGES_K: 3,
LAST_MESSAGES_K_WITH_PENDING: 2, // pending 存在时只取 2 条上下文,避免形成 4 段
// Anchor (L0 StateAtoms) // Anchor (L0 StateAtoms)
ANCHOR_MIN_SIMILARITY: 0.58, ANCHOR_MIN_SIMILARITY: 0.58,
@@ -51,6 +66,13 @@ const CONFIG = {
RRF_W_LEX: 0.9, RRF_W_LEX: 0.9,
FUSION_CAP: 60, FUSION_CAP: 60,
// Dense floor 聚合权重
DENSE_AGG_W_MAX: 0.6,
DENSE_AGG_W_MEAN: 0.4,
// Lexical floor 聚合密度加成
LEX_DENSITY_BONUS: 0.3,
// Rerankfloor-level // Rerankfloor-level
RERANK_TOP_N: 20, RERANK_TOP_N: 20,
RERANK_MIN_SCORE: 0.15, RERANK_MIN_SCORE: 0.15,
@@ -66,9 +88,6 @@ const CONFIG = {
/** /**
* 计算余弦相似度 * 计算余弦相似度
* @param {number[]} a
* @param {number[]} b
* @returns {number}
*/ */
function cosineSimilarity(a, b) { function cosineSimilarity(a, b) {
if (!a?.length || !b?.length || a.length !== b.length) return 0; if (!a?.length || !b?.length || a.length !== b.length) return 0;
@@ -83,8 +102,6 @@ function cosineSimilarity(a, b) {
/** /**
* 标准化字符串 * 标准化字符串
* @param {string} s
* @returns {string}
*/ */
function normalize(s) { function normalize(s) {
return String(s || '') return String(s || '')
@@ -96,12 +113,8 @@ function normalize(s) {
/** /**
* 获取最近消息 * 获取最近消息
* @param {object[]} chat
* @param {number} count
* @param {boolean} excludeLastAi
* @returns {object[]}
*/ */
function getLastMessages(chat, count = 2, excludeLastAi = false) { function getLastMessages(chat, count = 3, excludeLastAi = false) {
if (!chat?.length) return []; if (!chat?.length) return [];
let messages = [...chat]; let messages = [...chat];
if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
@@ -111,18 +124,128 @@ function getLastMessages(chat, count = 2, excludeLastAi = false) {
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// MMR 选择算法 // 加权向量工具
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/** /**
* Maximal Marginal Relevance 选择 * 多向量加权平均
* @param {object[]} candidates *
* @param {number} k * @param {number[][]} vectors - 向量数组
* @param {number} lambda * @param {number[]} weights - 归一化后的权重sum = 1
* @param {Function} getVector * @returns {number[]|null}
* @param {Function} getScore
* @returns {object[]}
*/ */
function weightedAverageVectors(vectors, weights) {
if (!vectors?.length || !weights?.length || vectors.length !== weights.length) return null;
const dims = vectors[0].length;
const result = new Array(dims).fill(0);
for (let i = 0; i < vectors.length; i++) {
const w = weights[i];
const v = vectors[i];
if (!v?.length) continue;
for (let d = 0; d < dims; d++) {
result[d] += w * v[d];
}
}
return result;
}
/**
* 对归一化权重做“目标位最小占比”硬保底
*
* @param {number[]} weights - 已归一化权重sum≈1
* @param {number} targetIdx - 目标位置focus 段索引)
* @param {number} minWeight - 最小占比0~1
* @returns {number[]} 调整后的归一化权重
*/
function clampMinNormalizedWeight(weights, targetIdx, minWeight) {
if (!weights?.length) return [];
if (targetIdx < 0 || targetIdx >= weights.length) return weights;
const current = weights[targetIdx];
if (current >= minWeight) return weights;
const otherSum = 1 - current;
if (otherSum <= 0) {
const out = new Array(weights.length).fill(0);
out[targetIdx] = 1;
return out;
}
const remain = 1 - minWeight;
const scale = remain / otherSum;
const out = weights.map((w, i) => (i === targetIdx ? minWeight : w * scale));
// 数值稳定性:消除浮点误差
const drift = 1 - out.reduce((a, b) => a + b, 0);
out[targetIdx] += drift;
return out;
}
/**
* 计算 R1 段权重baseWeight × lengthFactor归一化
*
* @param {object[]} segments - QuerySegment[]
* @returns {number[]} 归一化后的权重
*/
function computeSegmentWeights(segments) {
if (!segments?.length) return [];
const adjusted = segments.map(s => s.baseWeight * computeLengthFactor(s.charCount));
const sum = adjusted.reduce((a, b) => a + b, 0);
const normalized = sum <= 0
? segments.map(() => 1 / segments.length)
: adjusted.map(w => w / sum);
// focus 段始终在末尾
const focusIdx = segments.length - 1;
return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT);
}
/**
* 计算 R2 权重R1 段用 R2 基础权重 + hints 段,归一化)
*
* @param {object[]} segments - QuerySegment[](与 R1 相同的段)
* @param {object|null} hintsSegment - { text, baseWeight, charCount }
* @returns {number[]} 归一化后的权重(长度 = segments.length + (hints ? 1 : 0)
*/
function computeR2Weights(segments, hintsSegment) {
if (!segments?.length) return [];
// 为 R1 段分配 R2 基础权重(尾部对齐)
const contextCount = segments.length - 1;
const r2Base = [];
for (let i = 0; i < contextCount; i++) {
const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS_R2.length - contextCount + i);
r2Base.push(CONTEXT_BASE_WEIGHTS_R2[weightIdx] || CONTEXT_BASE_WEIGHTS_R2[0]);
}
r2Base.push(FOCUS_BASE_WEIGHT_R2);
// 应用 lengthFactor
const adjusted = r2Base.map((w, i) => w * computeLengthFactor(segments[i].charCount));
// 追加 hints
if (hintsSegment) {
adjusted.push(hintsSegment.baseWeight * computeLengthFactor(hintsSegment.charCount));
}
// 归一化
const sum = adjusted.reduce((a, b) => a + b, 0);
const normalized = sum <= 0
? adjusted.map(() => 1 / adjusted.length)
: adjusted.map(w => w / sum);
// R2 中 focus 位置固定为“segments 最后一个”
const focusIdx = segments.length - 1;
return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT);
}
// ═══════════════════════════════════════════════════════════════════════════
// MMR 选择算法
// ═══════════════════════════════════════════════════════════════════════════
function mmrSelect(candidates, k, lambda, getVector, getScore) { function mmrSelect(candidates, k, lambda, getVector, getScore) {
const selected = []; const selected = [];
const ids = new Set(); const ids = new Set();
@@ -166,13 +289,6 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// [Anchors] L0 StateAtoms 检索 // [Anchors] L0 StateAtoms 检索
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* 检索语义锚点
* @param {number[]} queryVector
* @param {object} vectorConfig
* @param {object|null} metrics
* @returns {Promise<{hits: object[], floors: Set<number>}>}
*/
async function recallAnchors(queryVector, vectorConfig, metrics) { async function recallAnchors(queryVector, vectorConfig, metrics) {
const { chatId } = getContext(); const { chatId } = getContext();
if (!chatId || !queryVector?.length) { if (!chatId || !queryVector?.length) {
@@ -228,15 +344,6 @@ async function recallAnchors(queryVector, vectorConfig, metrics) {
// [Events] L2 Events 检索 // [Events] L2 Events 检索
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* 检索事件
* @param {number[]} queryVector
* @param {object[]} allEvents
* @param {object} vectorConfig
* @param {string[]} focusEntities
* @param {object|null} metrics
* @returns {Promise<object[]>}
*/
async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) { async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) {
const { chatId } = getContext(); const { chatId } = getContext();
if (!chatId || !queryVector?.length || !allEvents?.length) { if (!chatId || !queryVector?.length || !allEvents?.length) {
@@ -344,11 +451,6 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
// [Causation] 因果链追溯 // [Causation] 因果链追溯
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* 构建事件索引
* @param {object[]} allEvents
* @returns {Map<string, object>}
*/
function buildEventIndex(allEvents) { function buildEventIndex(allEvents) {
const map = new Map(); const map = new Map();
for (const e of allEvents || []) { for (const e of allEvents || []) {
@@ -357,13 +459,6 @@ function buildEventIndex(allEvents) {
return map; return map;
} }
/**
* 追溯因果链
* @param {object[]} eventHits
* @param {Map<string, object>} eventIndex
* @param {number} maxDepth
* @returns {{results: object[], maxDepth: number}}
*/
function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
const out = new Map(); const out = new Map();
const idRe = /^evt-\d+$/; const idRe = /^evt-\d+$/;
@@ -411,23 +506,9 @@ function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MA
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// [W-RRF] 加权倒数排名融合(L0-only // [W-RRF] 加权倒数排名融合(floor 粒度
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* @typedef {object} RankedItem
* @property {string} id - 唯一标识符
* @property {number} score - 该路的原始分数
*/
/**
* W-RRF 加权倒数排名融合floor 粒度)
*
* @param {{id: number, score: number}[]} denseRank - Dense 路floor → max cosine降序
* @param {{id: number, score: number}[]} lexRank - Lexical 路floor → max bm25降序
* @param {number} cap - 输出上限
* @returns {{top: {id: number, fusionScore: number}[], totalUnique: number}}
*/
function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) { function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) {
const k = CONFIG.RRF_K; const k = CONFIG.RRF_K;
const wD = CONFIG.RRF_W_DENSE; const wD = CONFIG.RRF_W_DENSE;
@@ -464,16 +545,6 @@ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) {
// [Stage 6] Floor 融合 + Rerank + L1 配对 // [Stage 6] Floor 融合 + Rerank + L1 配对
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* Floor 粒度融合 + Rerank + L1 配对
*
* @param {object[]} anchorHits - L0 dense 命中Round 2
* @param {number[]} queryVector - 查询向量v1
* @param {string} rerankQuery - rerank 查询文本(纯自然语言)
* @param {object} lexicalResult - 词法检索结果
* @param {object} metrics
* @returns {Promise<{l0Selected: object[], l1ByFloor: Map<number, {aiTop1: object|null, userTop1: object|null}>}>}
*/
async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) { async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) {
const { chatId, chat, name1, name2 } = getContext(); const { chatId, chat, name1, name2 } = getContext();
if (!chatId) return { l0Selected: [], l1ByFloor: new Map() }; if (!chatId) return { l0Selected: [], l1ByFloor: new Map() };
@@ -481,26 +552,36 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
const T_Start = performance.now(); const T_Start = performance.now();
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// 6a. Dense floor rank每个 floor 取 max cosine // 6a. Dense floor rank加权聚合maxSim×0.6 + meanSim×0.4
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
const denseFloorMap = new Map(); const denseFloorAgg = new Map();
for (const a of (anchorHits || [])) { for (const a of (anchorHits || [])) {
const cur = denseFloorMap.get(a.floor) || 0; const cur = denseFloorAgg.get(a.floor);
if (a.similarity > cur) denseFloorMap.set(a.floor, a.similarity); if (!cur) {
denseFloorAgg.set(a.floor, { maxSim: a.similarity, hitCount: 1, sumSim: a.similarity });
} else {
cur.maxSim = Math.max(cur.maxSim, a.similarity);
cur.hitCount++;
cur.sumSim += a.similarity;
}
} }
const denseFloorRank = [...denseFloorMap.entries()] const denseFloorRank = [...denseFloorAgg.entries()]
.sort((a, b) => b[1] - a[1]) .map(([floor, info]) => ({
.map(([floor, score]) => ({ id: floor, score })); id: floor,
score: info.maxSim * CONFIG.DENSE_AGG_W_MAX
+ (info.sumSim / info.hitCount) * CONFIG.DENSE_AGG_W_MEAN,
}))
.sort((a, b) => b.score - a.score);
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// 6b. Lexical floor rankchunkScores → floor 聚合 + USER→AI 映射 + 预过滤 // 6b. Lexical floor rank密度加成maxScore × (1 + 0.3×log₂(hitCount))
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
const atomFloorSet = new Set(getStateAtoms().map(a => a.floor)); const atomFloorSet = new Set(getStateAtoms().map(a => a.floor));
const lexFloorScores = new Map(); const lexFloorAgg = new Map();
for (const { chunkId, score } of (lexicalResult?.chunkScores || [])) { for (const { chunkId, score } of (lexicalResult?.chunkScores || [])) {
const match = chunkId?.match(/^c-(\d+)-/); const match = chunkId?.match(/^c-(\d+)-/);
if (!match) continue; if (!match) continue;
@@ -519,13 +600,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
// 预过滤:必须有 L0 atoms // 预过滤:必须有 L0 atoms
if (!atomFloorSet.has(floor)) continue; if (!atomFloorSet.has(floor)) continue;
const cur = lexFloorScores.get(floor) || 0; const cur = lexFloorAgg.get(floor);
if (score > cur) lexFloorScores.set(floor, score); if (!cur) {
lexFloorAgg.set(floor, { maxScore: score, hitCount: 1 });
} else {
cur.maxScore = Math.max(cur.maxScore, score);
cur.hitCount++;
}
} }
const lexFloorRank = [...lexFloorScores.entries()] const lexFloorRank = [...lexFloorAgg.entries()]
.sort((a, b) => b[1] - a[1]) .map(([floor, info]) => ({
.map(([floor, score]) => ({ id: floor, score })); id: floor,
score: info.maxScore * (1 + CONFIG.LEX_DENSITY_BONUS * Math.log2(Math.max(1, info.hitCount))),
}))
.sort((a, b) => b.score - a.score);
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// 6c. Floor W-RRF 融合 // 6c. Floor W-RRF 融合
@@ -541,6 +630,8 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
metrics.fusion.totalUnique = totalUnique; metrics.fusion.totalUnique = totalUnique;
metrics.fusion.afterCap = fusedFloors.length; metrics.fusion.afterCap = fusedFloors.length;
metrics.fusion.time = fusionTime; metrics.fusion.time = fusionTime;
metrics.fusion.denseAggMethod = `max×${CONFIG.DENSE_AGG_W_MAX}+mean×${CONFIG.DENSE_AGG_W_MEAN}`;
metrics.fusion.lexDensityBonus = CONFIG.LEX_DENSITY_BONUS;
metrics.evidence.floorCandidates = fusedFloors.length; metrics.evidence.floorCandidates = fusedFloors.length;
} }
@@ -617,7 +708,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
} }
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// 6f. 并发 Rerank // 6f. Rerank
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
const T_Rerank_Start = performance.now(); const T_Rerank_Start = performance.now();
@@ -647,7 +738,6 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
}; };
} }
// document 平均长度
if (rerankCandidates.length > 0) { if (rerankCandidates.length > 0) {
const totalLen = rerankCandidates.reduce((s, c) => s + (c.text?.length || 0), 0); const totalLen = rerankCandidates.reduce((s, c) => s + (c.text?.length || 0), 0);
metrics.evidence.rerankDocAvgLength = Math.round(totalLen / rerankCandidates.length); metrics.evidence.rerankDocAvgLength = Math.round(totalLen / rerankCandidates.length);
@@ -666,6 +756,13 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
atomsByFloor.get(atom.floor).push(atom); atomsByFloor.get(atom.floor).push(atom);
} }
// 重建 denseFloorMap 以获取每层 max cosine用于 L0 similarity 标注)
const denseFloorMaxMap = new Map();
for (const a of (anchorHits || [])) {
const cur = denseFloorMaxMap.get(a.floor) || 0;
if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity);
}
const l0Selected = []; const l0Selected = [];
const l1ByFloor = new Map(); const l1ByFloor = new Map();
let contextPairsAdded = 0; let contextPairsAdded = 0;
@@ -673,9 +770,9 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
for (const item of reranked) { for (const item of reranked) {
const floor = item.floor; const floor = item.floor;
const rerankScore = item._rerankScore || 0; const rerankScore = item._rerankScore || 0;
const denseSim = denseFloorMap.get(floor) || 0; const denseSim = denseFloorMaxMap.get(floor) || 0;
// 收集该 floor 所有 L0 atoms,共享 floor 的 rerankScore // 收集该 floor 所有 L0 atoms
const floorAtoms = atomsByFloor.get(floor) || []; const floorAtoms = atomsByFloor.get(floor) || [];
for (const atom of floorAtoms) { for (const atom of floorAtoms) {
l0Selected.push({ l0Selected.push({
@@ -735,22 +832,14 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
return { l0Selected, l1ByFloor }; return { l0Selected, l1ByFloor };
} }
// [L1] 拉取 + Cosine 打分(并发子任务)
// ═══════════════════════════════════════════════════════════════════════════
// [L1] 拉取 + Cosine 打分
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* 从 IndexedDB 拉取指定楼层的 L1 chunks + 向量,用 queryVector cosine 打分
*
* @param {string} chatId
* @param {number[]} floors - 需要拉取的楼层列表
* @param {number[]} queryVector - 查询向量v1
* @param {object[]} chat - 聊天消息数组
* @returns {Promise<Map<number, object[]>>} floor → scored chunks带 _cosineScore
*/
async function pullAndScoreL1(chatId, floors, queryVector, chat) { async function pullAndScoreL1(chatId, floors, queryVector, chat) {
const T0 = performance.now(); const T0 = performance.now();
/** @type {Map<number, object[]>} */
const result = new Map(); const result = new Map();
if (!chatId || !floors?.length || !queryVector?.length) { if (!chatId || !floors?.length || !queryVector?.length) {
@@ -758,7 +847,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
return result; return result;
} }
// 拉取 chunks
let dbChunks = []; let dbChunks = [];
try { try {
dbChunks = await getChunksByFloors(chatId, floors); dbChunks = await getChunksByFloors(chatId, floors);
@@ -773,7 +861,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
return result; return result;
} }
// 拉取向量
const chunkIds = dbChunks.map(c => c.chunkId); const chunkIds = dbChunks.map(c => c.chunkId);
let chunkVectors = []; let chunkVectors = [];
try { try {
@@ -786,7 +873,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector])); const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector]));
// Cosine 打分 + 按楼层分组
for (const chunk of dbChunks) { for (const chunk of dbChunks) {
const vec = vectorMap.get(chunk.chunkId); const vec = vectorMap.get(chunk.chunkId);
const cosineScore = vec?.length ? cosineSimilarity(queryVector, vec) : 0; const cosineScore = vec?.length ? cosineSimilarity(queryVector, vec) : 0;
@@ -807,7 +893,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
result.get(chunk.floor).push(scored); result.get(chunk.floor).push(scored);
} }
// 每楼层按 cosine 降序排序
for (const [, chunks] of result) { for (const [, chunks] of result) {
chunks.sort((a, b) => b._cosineScore - a._cosineScore); chunks.sort((a, b) => b._cosineScore - a._cosineScore);
} }
@@ -825,16 +910,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
// 主函数 // 主函数
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
/**
* 执行记忆召回
*
* @param {object[]} allEvents - 所有事件L2
* @param {object} vectorConfig - 向量配置
* @param {object} options
* @param {boolean} options.excludeLastAi
* @param {string|null} options.pendingUserMessage
* @returns {Promise<object>}
*/
export async function recallMemory(allEvents, vectorConfig, options = {}) { export async function recallMemory(allEvents, vectorConfig, options = {}) {
const T0 = performance.now(); const T0 = performance.now();
const { chat } = getContext(); const { chat } = getContext();
@@ -865,7 +940,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
const T_Build_Start = performance.now(); const T_Build_Start = performance.now();
const lastMessages = getLastMessages(chat, CONFIG.LAST_MESSAGES_K, excludeLastAi); const lastMessagesCount = pendingUserMessage
? CONFIG.LAST_MESSAGES_K_WITH_PENDING
: CONFIG.LAST_MESSAGES_K;
const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);
const bundle = buildQueryBundle(lastMessages, pendingUserMessage); const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
@@ -873,29 +951,39 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.anchor.focusEntities = bundle.focusEntities; metrics.anchor.focusEntities = bundle.focusEntities;
if (metrics.query?.lengths) { if (metrics.query?.lengths) {
metrics.query.lengths.v0Chars = String(bundle.queryText_v0 || '').length; metrics.query.lengths.v0Chars = bundle.querySegments.reduce((sum, s) => sum + s.text.length, 0);
metrics.query.lengths.v1Chars = null; metrics.query.lengths.v1Chars = null;
metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v0 || '').length; metrics.query.lengths.rerankChars = String(bundle.rerankQuery || '').length;
} }
xbLog.info(MODULE_ID, xbLog.info(MODULE_ID,
`Query Build: focus=[${bundle.focusEntities.join(',')}] lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]` `Query Build: focus=[${bundle.focusEntities.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
); );
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// 阶段 2: Round 1 Dense Retrieval // 阶段 2: Round 1 Dense Retrievalbatch embed → 加权平均)
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
let queryVector_v0; const segmentTexts = bundle.querySegments.map(s => s.text);
if (!segmentTexts.length) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
elapsed: metrics.timing.total,
logText: 'No query segments.',
metrics,
};
}
let r1Vectors;
try { try {
const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 10000 }); r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 10000 });
queryVector_v0 = vec;
} catch (e1) { } catch (e1) {
xbLog.warn(MODULE_ID, 'Round 1 向量化失败500ms 后重试', e1); xbLog.warn(MODULE_ID, 'Round 1 向量化失败500ms 后重试', e1);
await new Promise(r => setTimeout(r, 500)); await new Promise(r => setTimeout(r, 500));
try { try {
const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 15000 }); r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 15000 });
queryVector_v0 = vec;
} catch (e2) { } catch (e2) {
xbLog.error(MODULE_ID, 'Round 1 向量化重试仍失败', e2); xbLog.error(MODULE_ID, 'Round 1 向量化重试仍失败', e2);
metrics.timing.total = Math.round(performance.now() - T0); metrics.timing.total = Math.round(performance.now() - T0);
@@ -909,13 +997,31 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
} }
} }
if (!r1Vectors?.length || r1Vectors.some(v => !v?.length)) {
metrics.timing.total = Math.round(performance.now() - T0);
return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities,
elapsed: metrics.timing.total,
logText: 'Empty query vectors (round 1).',
metrics,
};
}
const r1Weights = computeSegmentWeights(bundle.querySegments);
const queryVector_v0 = weightedAverageVectors(r1Vectors, r1Weights);
if (metrics) {
metrics.query.segmentWeights = r1Weights.map(w => Number(w.toFixed(3)));
}
if (!queryVector_v0?.length) { if (!queryVector_v0?.length) {
metrics.timing.total = Math.round(performance.now() - T0); metrics.timing.total = Math.round(performance.now() - T0);
return { return {
events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [], events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
focusEntities: bundle.focusEntities, focusEntities: bundle.focusEntities,
elapsed: metrics.timing.total, elapsed: metrics.timing.total,
logText: 'Empty query vector (round 1).', logText: 'Weighted average produced empty vector.',
metrics, metrics,
}; };
} }
@@ -929,7 +1035,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
const r1EventTime = Math.round(performance.now() - T_R1_Event_Start); const r1EventTime = Math.round(performance.now() - T_R1_Event_Start);
xbLog.info(MODULE_ID, xbLog.info(MODULE_ID,
`Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)` `Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} weights=[${r1Weights.map(w => w.toFixed(2)).join(',')}] (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)`
); );
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
@@ -943,27 +1049,44 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.query.refineTime = Math.round(performance.now() - T_Refine_Start); metrics.query.refineTime = Math.round(performance.now() - T_Refine_Start);
metrics.anchor.focusEntities = bundle.focusEntities; metrics.anchor.focusEntities = bundle.focusEntities;
if (metrics.query?.lengths) { // 更新 v1 长度指标
metrics.query.lengths.v1Chars = bundle.queryText_v1 == null ? null : String(bundle.queryText_v1).length; if (metrics.query?.lengths && bundle.hintsSegment) {
metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v1 || bundle.queryText_v0 || '').length; metrics.query.lengths.v1Chars = metrics.query.lengths.v0Chars + bundle.hintsSegment.text.length;
} }
xbLog.info(MODULE_ID, xbLog.info(MODULE_ID,
`Refinement: focus=[${bundle.focusEntities.join(',')}] hasV1=${!!bundle.queryText_v1} (${metrics.query.refineTime}ms)` `Refinement: focus=[${bundle.focusEntities.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
); );
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// 阶段 4: Round 2 Dense Retrieval // 阶段 4: Round 2 Dense Retrieval(复用 R1 向量 + embed hints
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
const queryTextFinal = bundle.queryText_v1 || bundle.queryText_v0;
let queryVector_v1; let queryVector_v1;
if (bundle.hintsSegment) {
try { try {
const [vec] = await embed([queryTextFinal], vectorConfig, { timeout: 10000 }); const [hintsVec] = await embed([bundle.hintsSegment.text], vectorConfig, { timeout: 10000 });
queryVector_v1 = vec;
if (hintsVec?.length) {
const r2Weights = computeR2Weights(bundle.querySegments, bundle.hintsSegment);
queryVector_v1 = weightedAverageVectors([...r1Vectors, hintsVec], r2Weights);
if (metrics) {
metrics.query.r2Weights = r2Weights.map(w => Number(w.toFixed(3)));
}
xbLog.info(MODULE_ID,
`Round 2 weights: [${r2Weights.map(w => w.toFixed(2)).join(',')}]`
);
} else {
queryVector_v1 = queryVector_v0;
}
} catch (e) { } catch (e) {
xbLog.warn(MODULE_ID, 'Round 2 向量化失败,降级使用 Round 1 向量', e); xbLog.warn(MODULE_ID, 'Round 2 hints 向量化失败,降级使用 Round 1 向量', e);
queryVector_v1 = queryVector_v0;
}
} else {
queryVector_v1 = queryVector_v0; queryVector_v1 = queryVector_v0;
} }
@@ -1082,13 +1205,14 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.event.entityNames = bundle.focusEntities; metrics.event.entityNames = bundle.focusEntities;
metrics.event.entitiesUsed = bundle.focusEntities.length; metrics.event.entitiesUsed = bundle.focusEntities.length;
console.group('%c[Recall v7]', 'color: #7c3aed; font-weight: bold'); console.group('%c[Recall v8]', 'color: #7c3aed; font-weight: bold');
console.log(`Total: ${metrics.timing.total}ms`); console.log(`Total: ${metrics.timing.total}ms`);
console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`); console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`);
console.log(`R1 weights: [${r1Weights.map(w => w.toFixed(2)).join(', ')}]`);
console.log(`Focus: [${bundle.focusEntities.join(', ')}]`); console.log(`Focus: [${bundle.focusEntities.join(', ')}]`);
console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`); console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`);
console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length}`); console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length}`);
console.log(`Fusion (floor): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`); console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0}${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`); console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0}${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`); console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`); console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);