// ═══════════════════════════════════════════════════════════════════════════ // Story Summary - Recall Engine (v4 - L0 无上限 + L1 粗筛) // ═══════════════════════════════════════════════════════════════════════════ import { getAllEventVectors, getChunksByFloors, getMeta, getChunkVectorsByIds } from '../storage/chunk-store.js'; import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js'; import { getEngineFingerprint, embed } from '../utils/embedder.js'; import { xbLog } from '../../../../core/debug-core.js'; import { getContext } from '../../../../../../../extensions.js'; import { filterText } from '../utils/text-filter.js'; import { expandQueryCached, buildSearchText } from '../llm/query-expansion.js'; import { rerankChunks } from '../llm/reranker.js'; import { createMetrics, calcSimilarityStats } from './metrics.js'; const MODULE_ID = 'recall'; // ═══════════════════════════════════════════════════════════════════════════ // 配置 // ═══════════════════════════════════════════════════════════════════════════ const CONFIG = { // Query Expansion QUERY_EXPANSION_TIMEOUT: 6000, // L0 配置 - 去掉硬上限,提高阈值 L0_MIN_SIMILARITY: 0.58, // L1 粗筛配置 L1_MAX_CANDIDATES: 100, // L2 配置 L2_CANDIDATE_MAX: 100, L2_SELECT_MAX: 50, L2_MIN_SIMILARITY: 0.55, L2_MMR_LAMBDA: 0.72, // Rerank 配置 RERANK_THRESHOLD: 80, RERANK_TOP_N: 50, RERANK_MIN_SCORE: 0.15, // 因果链 CAUSAL_CHAIN_MAX_DEPTH: 10, CAUSAL_INJECT_MAX: 30, }; // ═══════════════════════════════════════════════════════════════════════════ // 工具函数 // ═══════════════════════════════════════════════════════════════════════════ function cosineSimilarity(a, b) { if (!a?.length || !b?.length || a.length !== b.length) return 0; let dot = 0, nA = 0, nB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; nA += a[i] * a[i]; nB += b[i] * b[i]; } return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0; } function normalize(s) { return String(s || '') .normalize('NFKC') .replace(/[\u200B-\u200D\uFEFF]/g, '') .trim() .toLowerCase(); } function cleanForRecall(text) { return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim(); } function removeUserNameFromFocus(focusEntities, userName) { const u = normalize(userName); if (!u) return Array.isArray(focusEntities) ? focusEntities : []; return (focusEntities || []) .map(e => String(e || '').trim()) .filter(Boolean) .filter(e => normalize(e) !== u); } function buildRerankQuery(expansion, lastMessages, pendingUserMessage) { const parts = []; if (expansion?.focus?.length) { parts.push(expansion.focus.join(' ')); } if (expansion?.queries?.length) { parts.push(...expansion.queries.slice(0, 3)); } const recentTexts = (lastMessages || []) .slice(-2) .map(m => cleanForRecall(m.mes || '').slice(0, 150)) .filter(Boolean); if (recentTexts.length) { parts.push(...recentTexts); } if (pendingUserMessage) { parts.push(cleanForRecall(pendingUserMessage).slice(0, 200)); } return parts.filter(Boolean).join('\n').slice(0, 1500); } // ═══════════════════════════════════════════════════════════════════════════ // MMR 选择 // ═══════════════════════════════════════════════════════════════════════════ function mmrSelect(candidates, k, lambda, getVector, getScore) { const selected = []; const ids = new Set(); while (selected.length < k && candidates.length) { let best = null; let bestScore = -Infinity; for (const c of candidates) { if (ids.has(c._id)) continue; const rel = getScore(c); let div = 0; if (selected.length) { const vC = getVector(c); if (vC?.length) { for (const s of selected) { const sim = cosineSimilarity(vC, getVector(s)); if (sim > div) div = sim; } } } const score = lambda * rel - (1 - lambda) * div; if (score > bestScore) { bestScore = score; best = c; } } if (!best) break; selected.push(best); ids.add(best._id); } return selected; } // ═══════════════════════════════════════════════════════════════════════════ // L0 检索:无上限,阈值过滤 // ═══════════════════════════════════════════════════════════════════════════ async function searchL0(queryVector, vectorConfig, metrics) { const { chatId } = getContext(); if (!chatId || !queryVector?.length) { return { atoms: [], floors: new Set() }; } const meta = await getMeta(chatId); const fp = getEngineFingerprint(vectorConfig); if (meta.fingerprint && meta.fingerprint !== fp) { xbLog.warn(MODULE_ID, 'L0 fingerprint 不匹配'); return { atoms: [], floors: new Set() }; } const stateVectors = await getAllStateVectors(chatId); if (!stateVectors.length) { return { atoms: [], floors: new Set() }; } const atomsList = getStateAtoms(); const atomMap = new Map(atomsList.map(a => [a.atomId, a])); // ★ 只按阈值过滤,不设硬上限 const scored = stateVectors .map(sv => { const atom = atomMap.get(sv.atomId); if (!atom) return null; return { atomId: sv.atomId, floor: sv.floor, similarity: cosineSimilarity(queryVector, sv.vector), atom, }; }) .filter(Boolean) .filter(s => s.similarity >= CONFIG.L0_MIN_SIMILARITY) .sort((a, b) => b.similarity - a.similarity); const floors = new Set(scored.map(s => s.floor)); if (metrics) { metrics.l0.atomsMatched = scored.length; metrics.l0.floorsHit = floors.size; metrics.l0.topAtoms = scored.slice(0, 5).map(s => ({ floor: s.floor, semantic: s.atom?.semantic?.slice(0, 50), similarity: Math.round(s.similarity * 1000) / 1000, })); } return { atoms: scored, floors }; } // ═══════════════════════════════════════════════════════════════════════════ // 统计 chunks 类型构成 // ═══════════════════════════════════════════════════════════════════════════ function countChunksByType(chunks) { let l0Virtual = 0; let l1Real = 0; for (const c of chunks || []) { if (c.isL0) { l0Virtual++; } else { l1Real++; } } return { l0Virtual, l1Real }; } // ═══════════════════════════════════════════════════════════════════════════ // L3 拉取 + L1 粗筛 + Rerank // ═══════════════════════════════════════════════════════════════════════════ async function getChunksFromL0Floors(l0Floors, l0Atoms, queryVector, queryText, metrics) { const { chatId } = getContext(); if (!chatId || !l0Floors.size) { return []; } const floorArray = Array.from(l0Floors); // 1. 构建 L0 虚拟 chunks const l0VirtualChunks = (l0Atoms || []).map(a => ({ chunkId: `state-${a.atomId}`, floor: a.floor, chunkIdx: -1, speaker: '📌', isUser: false, text: a.atom?.semantic || '', similarity: a.similarity, isL0: true, _atom: a.atom, })); // 2. 拉取 L1 chunks let dbChunks = []; try { dbChunks = await getChunksByFloors(chatId, floorArray); } catch (e) { xbLog.warn(MODULE_ID, '从 DB 拉取 chunks 失败', e); } // 3. ★ L1 向量粗筛 let l1Filtered = []; if (dbChunks.length > 0 && queryVector?.length) { const chunkIds = dbChunks.map(c => c.chunkId); let chunkVectors = []; try { chunkVectors = await getChunkVectorsByIds(chatId, chunkIds); } catch (e) { xbLog.warn(MODULE_ID, 'L1 向量获取失败', e); } const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector])); l1Filtered = dbChunks .map(c => { const vec = vectorMap.get(c.chunkId); if (!vec?.length) return null; return { ...c, isL0: false, similarity: cosineSimilarity(queryVector, vec), }; }) .filter(Boolean) .sort((a, b) => b.similarity - a.similarity) .slice(0, CONFIG.L1_MAX_CANDIDATES); } // 4. 合并 const allChunks = [...l0VirtualChunks, ...l1Filtered]; // ★ 更新 metrics if (metrics) { metrics.l3.floorsFromL0 = floorArray.length; metrics.l3.l1Total = dbChunks.length; metrics.l3.l1AfterCoarse = l1Filtered.length; metrics.l3.chunksInRange = l0VirtualChunks.length + l1Filtered.length; metrics.l3.chunksInRangeByType = { l0Virtual: l0VirtualChunks.length, l1Real: l1Filtered.length, }; } // 5. 是否需要 Rerank if (allChunks.length <= CONFIG.RERANK_THRESHOLD) { if (metrics) { metrics.l3.rerankApplied = false; metrics.l3.chunksSelected = allChunks.length; metrics.l3.chunksSelectedByType = countChunksByType(allChunks); } return allChunks; } // 6. Rerank 精排 const T_Rerank_Start = performance.now(); const reranked = await rerankChunks(queryText, allChunks, { topN: CONFIG.RERANK_TOP_N, minScore: CONFIG.RERANK_MIN_SCORE, }); const rerankTime = Math.round(performance.now() - T_Rerank_Start); if (metrics) { metrics.l3.rerankApplied = true; metrics.l3.beforeRerank = allChunks.length; metrics.l3.afterRerank = reranked.length; metrics.l3.chunksSelected = reranked.length; metrics.l3.chunksSelectedByType = countChunksByType(reranked); metrics.l3.rerankTime = rerankTime; metrics.timing.l3Rerank = rerankTime; const scores = reranked.map(c => c._rerankScore || 0).filter(s => s > 0); if (scores.length > 0) { scores.sort((a, b) => a - b); metrics.l3.rerankScoreDistribution = { min: Number(scores[0].toFixed(3)), max: Number(scores[scores.length - 1].toFixed(3)), mean: Number((scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(3)), }; } } xbLog.info(MODULE_ID, `L3: ${dbChunks.length} L1 → ${l1Filtered.length} 粗筛 → ${reranked.length} Rerank (${rerankTime}ms)`); return reranked; } // ═══════════════════════════════════════════════════════════════════════════ // L2 检索(保持不变) // ═══════════════════════════════════════════════════════════════════════════ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntities, metrics) { const { chatId } = getContext(); if (!chatId || !queryVector?.length || !allEvents?.length) { return []; } const meta = await getMeta(chatId); const fp = getEngineFingerprint(vectorConfig); if (meta.fingerprint && meta.fingerprint !== fp) { xbLog.warn(MODULE_ID, 'L2 fingerprint 不匹配'); return []; } const eventVectors = await getAllEventVectors(chatId); const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector])); if (!vectorMap.size) { return []; } const focusSet = new Set((focusEntities || []).map(normalize)); const scored = allEvents.map(event => { const v = vectorMap.get(event.id); const baseSim = v ? cosineSimilarity(queryVector, v) : 0; const participants = (event.participants || []).map(p => normalize(p)); const hasEntityMatch = participants.some(p => focusSet.has(p)); const bonus = hasEntityMatch ? 0.05 : 0; return { _id: event.id, event, similarity: baseSim + bonus, _baseSim: baseSim, _hasEntityMatch: hasEntityMatch, vector: v, }; }); if (metrics) { metrics.l2.eventsInStore = allEvents.length; } let candidates = scored .filter(s => s.similarity >= CONFIG.L2_MIN_SIMILARITY) .sort((a, b) => b.similarity - a.similarity) .slice(0, CONFIG.L2_CANDIDATE_MAX); if (metrics) { metrics.l2.eventsConsidered = candidates.length; } if (focusSet.size > 0) { const beforeFilter = candidates.length; candidates = candidates.filter(c => { if (c.similarity >= 0.85) return true; return c._hasEntityMatch; }); if (metrics) { metrics.l2.entityFilterStats = { focusEntities: focusEntities || [], before: beforeFilter, after: candidates.length, filtered: beforeFilter - candidates.length, }; } } const selected = mmrSelect( candidates, CONFIG.L2_SELECT_MAX, CONFIG.L2_MMR_LAMBDA, c => c.vector, c => c.similarity ); let directCount = 0; let contextCount = 0; const results = selected.map(s => { const recallType = s._hasEntityMatch ? 'DIRECT' : 'SIMILAR'; if (recallType === 'DIRECT') directCount++; else contextCount++; return { event: s.event, similarity: s.similarity, _recallType: recallType, _baseSim: s._baseSim, }; }); if (metrics) { metrics.l2.eventsSelected = results.length; metrics.l2.byRecallType = { direct: directCount, context: contextCount, causal: 0 }; metrics.l2.similarityDistribution = calcSimilarityStats(results.map(r => r.similarity)); } return results; } // ═══════════════════════════════════════════════════════════════════════════ // 因果链追溯(保持不变) // ═══════════════════════════════════════════════════════════════════════════ function buildEventIndex(allEvents) { const map = new Map(); for (const e of allEvents || []) { if (e?.id) map.set(e.id, e); } return map; } function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) { const out = new Map(); const idRe = /^evt-\d+$/; let maxActualDepth = 0; function visit(parentId, depth, chainFrom) { if (depth > maxDepth) return; if (!idRe.test(parentId)) return; const ev = eventIndex.get(parentId); if (!ev) return; if (depth > maxActualDepth) maxActualDepth = depth; const existed = out.get(parentId); if (!existed) { out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] }); } else { if (depth < existed.depth) existed.depth = depth; if (!existed.chainFrom.includes(chainFrom)) existed.chainFrom.push(chainFrom); } for (const next of (ev.causedBy || [])) { visit(String(next || '').trim(), depth + 1, chainFrom); } } for (const r of recalledEvents || []) { const rid = r?.event?.id; if (!rid) continue; for (const cid of (r.event?.causedBy || [])) { visit(String(cid || '').trim(), 1, rid); } } const results = Array.from(out.values()) .sort((a, b) => { const refDiff = b.chainFrom.length - a.chainFrom.length; if (refDiff !== 0) return refDiff; return a.depth - b.depth; }) .slice(0, CONFIG.CAUSAL_INJECT_MAX); return { results, maxDepth: maxActualDepth }; } // ═══════════════════════════════════════════════════════════════════════════ // 辅助函数 // ═══════════════════════════════════════════════════════════════════════════ function getLastMessages(chat, count = 4, excludeLastAi = false) { if (!chat?.length) return []; let messages = [...chat]; if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { messages = messages.slice(0, -1); } return messages.slice(-count); } export function buildQueryText(chat, count = 2, excludeLastAi = false) { if (!chat?.length) return ''; let messages = chat; if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) { messages = messages.slice(0, -1); } return messages.slice(-count).map(m => { const text = cleanForRecall(m.mes); const speaker = m.name || (m.is_user ? '用户' : '角色'); return `${speaker}: ${text.slice(0, 500)}`; }).filter(Boolean).join('\n'); } // ═══════════════════════════════════════════════════════════════════════════ // 主函数 // ═══════════════════════════════════════════════════════════════════════════ export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) { const T0 = performance.now(); const { chat, name1 } = getContext(); const { pendingUserMessage = null, excludeLastAi = false } = options; const metrics = createMetrics(); if (!allEvents?.length) { metrics.l0.needRecall = false; return { events: [], chunks: [], causalEvents: [], focusEntities: [], elapsed: 0, logText: 'No events.', metrics }; } // ═══════════════════════════════════════════════════════════════════════ // Step 1: Query Expansion // ═══════════════════════════════════════════════════════════════════════ const T_QE_Start = performance.now(); const lastMessages = getLastMessages(chat, 4, excludeLastAi); let expansion = { focus: [], queries: [] }; try { expansion = await expandQueryCached(lastMessages, { pendingUserMessage, timeout: CONFIG.QUERY_EXPANSION_TIMEOUT, }); xbLog.info(MODULE_ID, `Query Expansion: focus=[${expansion.focus.join(',')}] queries=${expansion.queries.length}`); } catch (e) { xbLog.warn(MODULE_ID, 'Query Expansion 失败,降级使用原始文本', e); } const searchText = buildSearchText(expansion); const finalSearchText = searchText || queryText || lastMessages.map(m => cleanForRecall(m.mes || '').slice(0, 200)).join(' '); const focusEntities = removeUserNameFromFocus(expansion.focus, name1); metrics.l0.needRecall = true; metrics.l0.focusEntities = focusEntities; metrics.l0.queries = expansion.queries || []; metrics.l0.queryExpansionTime = Math.round(performance.now() - T_QE_Start); metrics.timing.queryExpansion = metrics.l0.queryExpansionTime; // ═══════════════════════════════════════════════════════════════════════ // Step 2: 向量化查询 // ═══════════════════════════════════════════════════════════════════════ let queryVector; try { const [vec] = await embed([finalSearchText], vectorConfig, { timeout: 10000 }); queryVector = vec; } catch (e) { xbLog.error(MODULE_ID, '向量化失败', e); metrics.timing.total = Math.round(performance.now() - T0); return { events: [], chunks: [], causalEvents: [], focusEntities, elapsed: metrics.timing.total, logText: 'Embedding failed.', metrics }; } if (!queryVector?.length) { metrics.timing.total = Math.round(performance.now() - T0); return { events: [], chunks: [], causalEvents: [], focusEntities, elapsed: metrics.timing.total, logText: 'Empty query vector.', metrics }; } // ═══════════════════════════════════════════════════════════════════════ // Step 3: L0 检索 // ═══════════════════════════════════════════════════════════════════════ const T_L0_Start = performance.now(); const { atoms: l0Atoms, floors: l0Floors } = await searchL0(queryVector, vectorConfig, metrics); metrics.timing.l0Search = Math.round(performance.now() - T_L0_Start); // ═══════════════════════════════════════════════════════════════════════ // Step 4: L3 拉取 + L1 粗筛 + Rerank // ═══════════════════════════════════════════════════════════════════════ const T_L3_Start = performance.now(); const rerankQuery = buildRerankQuery(expansion, lastMessages, pendingUserMessage); const chunks = await getChunksFromL0Floors(l0Floors, l0Atoms, queryVector, rerankQuery, metrics); metrics.timing.l3Retrieval = Math.round(performance.now() - T_L3_Start); // ═══════════════════════════════════════════════════════════════════════ // Step 5: L2 独立检索 // ═══════════════════════════════════════════════════════════════════════ const T_L2_Start = performance.now(); const eventResults = await searchL2Events(queryVector, allEvents, vectorConfig, focusEntities, metrics); metrics.timing.l2Retrieval = Math.round(performance.now() - T_L2_Start); // ═══════════════════════════════════════════════════════════════════════ // Step 6: 因果链追溯 // ═══════════════════════════════════════════════════════════════════════ const eventIndex = buildEventIndex(allEvents); const { results: causalMap, maxDepth: causalMaxDepth } = traceCausalAncestors(eventResults, eventIndex); const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean)); const causalEvents = causalMap .filter(x => x?.event?.id && !recalledIdSet.has(x.event.id)) .map(x => ({ event: x.event, similarity: 0, _recallType: 'CAUSAL', _causalDepth: x.depth, chainFrom: x.chainFrom, })); if (metrics.l2.byRecallType) { metrics.l2.byRecallType.causal = causalEvents.length; } metrics.l2.causalChainDepth = causalMaxDepth; metrics.l2.causalEventsCount = causalEvents.length; // ═══════════════════════════════════════════════════════════════════════ // 完成 // ═══════════════════════════════════════════════════════════════════════ metrics.timing.total = Math.round(performance.now() - T0); metrics.l2.entityNames = focusEntities; metrics.l2.entitiesLoaded = focusEntities.length; console.group('%c[Recall v4]', 'color: #7c3aed; font-weight: bold'); console.log(`Elapsed: ${metrics.timing.total}ms`); console.log(`Query Expansion: focus=[${expansion.focus.join(', ')}]`); console.log(`L0: ${l0Atoms.length} atoms → ${l0Floors.size} floors`); console.log(`L3: ${metrics.l3.l1Total || 0} L1 → ${metrics.l3.l1AfterCoarse || 0} 粗筛 → ${chunks.length} final`); if (metrics.l3.rerankApplied) { console.log(`L3 Rerank: ${metrics.l3.beforeRerank} → ${metrics.l3.afterRerank} (${metrics.l3.rerankTime}ms)`); } console.log(`L2: ${eventResults.length} events, ${causalEvents.length} causal`); console.groupEnd(); return { events: eventResults, causalEvents, chunks, expansion, focusEntities, elapsed: metrics.timing.total, metrics, }; }