780 lines
34 KiB
JavaScript
780 lines
34 KiB
JavaScript
// Story Summary - Recall Engine
|
||
// L1 chunk + L2 event 召回
|
||
// - 全量向量打分
|
||
// - 指数衰减加权 Query Embedding
|
||
// - 实体/参与者加分
|
||
// - MMR 去重
|
||
// - floor 稀疏去重
|
||
|
||
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
|
||
import { embed, getEngineFingerprint } from './embedder.js';
|
||
import { xbLog } from '../../../core/debug-core.js';
|
||
import { getContext } from '../../../../../../extensions.js';
|
||
import { getSummaryStore } from '../data/store.js';
|
||
import { filterText } from './text-filter.js';
|
||
|
||
const MODULE_ID = 'recall';
|
||
|
||
const CONFIG = {
|
||
QUERY_MSG_COUNT: 5,
|
||
QUERY_DECAY_BETA: 0.7,
|
||
QUERY_MAX_CHARS: 600,
|
||
QUERY_CONTEXT_CHARS: 240,
|
||
|
||
// 因果链
|
||
CAUSAL_CHAIN_MAX_DEPTH: 10, // 放宽跳数,让图自然终止
|
||
CAUSAL_INJECT_MAX: 30, // 放宽上限,由 prompt token 预算最终控制
|
||
|
||
CANDIDATE_CHUNKS: 200,
|
||
CANDIDATE_EVENTS: 150,
|
||
|
||
MAX_CHUNKS: 40,
|
||
MAX_EVENTS: 120,
|
||
|
||
MIN_SIMILARITY_CHUNK: 0.6,
|
||
MIN_SIMILARITY_EVENT: 0.65,
|
||
MMR_LAMBDA: 0.72,
|
||
|
||
BONUS_PARTICIPANT_HIT: 0.08,
|
||
BONUS_TEXT_HIT: 0.05,
|
||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
||
|
||
FLOOR_LIMIT: 1,
|
||
};
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 工具函数
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function cosineSimilarity(a, b) {
|
||
if (!a?.length || !b?.length || a.length !== b.length) return 0;
|
||
let dot = 0, nA = 0, nB = 0;
|
||
for (let i = 0; i < a.length; i++) {
|
||
dot += a[i] * b[i];
|
||
nA += a[i] * a[i];
|
||
nB += b[i] * b[i];
|
||
}
|
||
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
|
||
}
|
||
|
||
function normalizeVec(v) {
|
||
let s = 0;
|
||
for (let i = 0; i < v.length; i++) s += v[i] * v[i];
|
||
s = Math.sqrt(s) || 1;
|
||
return v.map(x => x / s);
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 因果链追溯(Graph-augmented retrieval)
|
||
// - 从已召回事件出发,沿 causedBy 向上追溯祖先事件
|
||
// - 记录边:chainFrom = 哪个召回事件需要它
|
||
// - 不在这里决定“是否额外注入”,只负责遍历与结构化结果
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function buildEventIndex(allEvents) {
|
||
const map = new Map();
|
||
for (const e of allEvents || []) {
|
||
if (e?.id) map.set(e.id, e);
|
||
}
|
||
return map;
|
||
}
|
||
|
||
/**
|
||
* @returns {Map<string, {event, depth, chainFrom}>}
|
||
*/
|
||
function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
|
||
const out = new Map();
|
||
const idRe = /^evt-\d+$/;
|
||
|
||
function visit(parentId, depth, chainFrom) {
|
||
if (depth > maxDepth) return;
|
||
if (!idRe.test(parentId)) return;
|
||
|
||
const ev = eventIndex.get(parentId);
|
||
if (!ev) return;
|
||
|
||
// 如果同一个祖先被多个召回事件引用:保留更“近”的深度或追加来源
|
||
const existed = out.get(parentId);
|
||
if (!existed) {
|
||
out.set(parentId, { event: ev, depth, chainFrom: [chainFrom] });
|
||
} else {
|
||
if (depth < existed.depth) existed.depth = depth;
|
||
if (!existed.chainFrom.includes(chainFrom)) existed.chainFrom.push(chainFrom);
|
||
}
|
||
|
||
for (const next of (ev.causedBy || [])) {
|
||
visit(String(next || '').trim(), depth + 1, chainFrom);
|
||
}
|
||
}
|
||
|
||
for (const r of recalledEvents || []) {
|
||
const rid = r?.event?.id;
|
||
if (!rid) continue;
|
||
for (const cid of (r.event?.causedBy || [])) {
|
||
visit(String(cid || '').trim(), 1, rid);
|
||
}
|
||
}
|
||
|
||
return out;
|
||
}
|
||
|
||
/**
|
||
* 因果事件排序:引用数 > 深度 > 编号
|
||
*/
|
||
function sortCausalEvents(causalArray) {
|
||
return causalArray.sort((a, b) => {
|
||
// 1. 被多条召回链引用的优先
|
||
const refDiff = b.chainFrom.length - a.chainFrom.length;
|
||
if (refDiff !== 0) return refDiff;
|
||
|
||
// 2. 深度浅的优先
|
||
const depthDiff = a.depth - b.depth;
|
||
if (depthDiff !== 0) return depthDiff;
|
||
|
||
// 3. 事件编号排序
|
||
return String(a.event.id).localeCompare(String(b.event.id));
|
||
});
|
||
}
|
||
|
||
function normalize(s) {
|
||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||
}
|
||
|
||
function cleanForRecall(text) {
|
||
// 1. 应用用户自定义过滤规则
|
||
// 2. 移除 TTS 标记(硬编码)
|
||
return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
|
||
}
|
||
|
||
function buildExpDecayWeights(n, beta) {
|
||
const last = n - 1;
|
||
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
|
||
const sum = w.reduce((a, b) => a + b, 0) || 1;
|
||
return w.map(x => x / sum);
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// Query 构建
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = null) {
|
||
if (!chat?.length) return [];
|
||
|
||
let messages = chat;
|
||
if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
|
||
messages = messages.slice(0, -1);
|
||
}
|
||
|
||
// ★ 如果有待处理的用户消息且 chat 中最后一条不是它,追加虚拟消息
|
||
if (pendingUserMessage) {
|
||
const lastMsg = messages[messages.length - 1];
|
||
const lastMsgText = lastMsg?.mes?.trim() || "";
|
||
const pendingText = pendingUserMessage.trim();
|
||
|
||
// 避免重复(如果 chat 已包含该消息则不追加)
|
||
if (lastMsgText !== pendingText) {
|
||
messages = [...messages, { is_user: true, name: "用户", mes: pendingUserMessage }];
|
||
}
|
||
}
|
||
|
||
return messages.slice(-count).map((m, idx, arr) => {
|
||
const speaker = m.name || (m.is_user ? '用户' : '角色');
|
||
const clean = cleanForRecall(m.mes);
|
||
if (!clean) return '';
|
||
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
|
||
return `${speaker}: ${clean.slice(0, limit)}`;
|
||
}).filter(Boolean);
|
||
}
|
||
|
||
async function embedWeightedQuery(segments, vectorConfig) {
|
||
if (!segments?.length) return null;
|
||
|
||
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
|
||
const vecs = await embed(segments, vectorConfig);
|
||
const dims = vecs?.[0]?.length || 0;
|
||
if (!dims) return null;
|
||
|
||
const out = new Array(dims).fill(0);
|
||
for (let i = 0; i < vecs.length; i++) {
|
||
for (let j = 0; j < dims; j++) out[j] += (vecs[i][j] || 0) * weights[i];
|
||
}
|
||
|
||
return { vector: normalizeVec(out), weights };
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 实体抽取
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function buildEntityLexicon(store, allEvents) {
|
||
const { name1 } = getContext();
|
||
const userName = normalize(name1);
|
||
const set = new Set();
|
||
|
||
for (const e of allEvents || []) {
|
||
for (const p of e.participants || []) {
|
||
const s = normalize(p);
|
||
if (s) set.add(s);
|
||
}
|
||
}
|
||
|
||
const json = store?.json || {};
|
||
|
||
for (const m of json.characters?.main || []) {
|
||
const s = normalize(typeof m === 'string' ? m : m?.name);
|
||
if (s) set.add(s);
|
||
}
|
||
|
||
for (const a of json.arcs || []) {
|
||
const s = normalize(a?.name);
|
||
if (s) set.add(s);
|
||
}
|
||
|
||
for (const w of json.world || []) {
|
||
const t = normalize(w?.topic);
|
||
if (t && !t.includes('::')) set.add(t);
|
||
}
|
||
|
||
for (const r of json.characters?.relationships || []) {
|
||
const from = normalize(r?.from);
|
||
const to = normalize(r?.to);
|
||
if (from) set.add(from);
|
||
if (to) set.add(to);
|
||
}
|
||
|
||
const stop = new Set([userName, '我', '你', '他', '她', '它', '用户', '角色', 'assistant'].map(normalize).filter(Boolean));
|
||
|
||
return Array.from(set)
|
||
.filter(s => s.length >= 2 && !stop.has(s) && !/^[\s\p{P}\p{S}]+$/u.test(s) && !/<[^>]+>/.test(s))
|
||
.slice(0, 5000);
|
||
}
|
||
|
||
function extractEntities(text, lexicon) {
|
||
const t = normalize(text);
|
||
if (!t || !lexicon?.length) return [];
|
||
|
||
const sorted = [...lexicon].sort((a, b) => b.length - a.length);
|
||
const hits = [];
|
||
for (const e of sorted) {
|
||
if (t.includes(e)) hits.push(e);
|
||
if (hits.length >= 20) break;
|
||
}
|
||
return hits;
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// MMR
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
||
const selected = [];
|
||
const ids = new Set();
|
||
|
||
while (selected.length < k && candidates.length) {
|
||
let best = null, bestScore = -Infinity;
|
||
|
||
for (const c of candidates) {
|
||
if (ids.has(c._id)) continue;
|
||
|
||
const rel = getScore(c);
|
||
let div = 0;
|
||
|
||
if (selected.length) {
|
||
const vC = getVector(c);
|
||
if (vC?.length) {
|
||
for (const s of selected) {
|
||
const sim = cosineSimilarity(vC, getVector(s));
|
||
if (sim > div) div = sim;
|
||
}
|
||
}
|
||
}
|
||
|
||
const score = lambda * rel - (1 - lambda) * div;
|
||
if (score > bestScore) {
|
||
bestScore = score;
|
||
best = c;
|
||
}
|
||
}
|
||
|
||
if (!best) break;
|
||
selected.push(best);
|
||
ids.add(best._id);
|
||
}
|
||
|
||
return selected;
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// L1 Chunks 检索
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
async function searchChunks(queryVector, vectorConfig) {
|
||
const { chatId } = getContext();
|
||
if (!chatId || !queryVector?.length) return [];
|
||
|
||
const meta = await getMeta(chatId);
|
||
const fp = getEngineFingerprint(vectorConfig);
|
||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||
|
||
const chunkVectors = await getAllChunkVectors(chatId);
|
||
if (!chunkVectors.length) return [];
|
||
|
||
const scored = chunkVectors.map(cv => {
|
||
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
|
||
return {
|
||
_id: cv.chunkId,
|
||
chunkId: cv.chunkId,
|
||
floor: match ? parseInt(match[1], 10) : 0,
|
||
chunkIdx: match ? parseInt(match[2], 10) : 0,
|
||
similarity: cosineSimilarity(queryVector, cv.vector),
|
||
vector: cv.vector,
|
||
};
|
||
});
|
||
|
||
// Pre-filter stats for logging
|
||
const preFilterStats = {
|
||
total: scored.length,
|
||
passThreshold: scored.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK).length,
|
||
threshold: CONFIG.MIN_SIMILARITY_CHUNK,
|
||
distribution: {
|
||
'0.8+': scored.filter(s => s.similarity >= 0.8).length,
|
||
'0.7-0.8': scored.filter(s => s.similarity >= 0.7 && s.similarity < 0.8).length,
|
||
'0.6-0.7': scored.filter(s => s.similarity >= 0.6 && s.similarity < 0.7).length,
|
||
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
|
||
'<0.55': scored.filter(s => s.similarity < 0.55).length,
|
||
},
|
||
};
|
||
|
||
const candidates = scored
|
||
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY_CHUNK)
|
||
.sort((a, b) => b.similarity - a.similarity)
|
||
.slice(0, CONFIG.CANDIDATE_CHUNKS);
|
||
|
||
// 动态 K:质量不够就少拿
|
||
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
||
|
||
const selected = mmrSelect(
|
||
candidates,
|
||
dynamicK,
|
||
CONFIG.MMR_LAMBDA,
|
||
c => c.vector,
|
||
c => c.similarity
|
||
);
|
||
|
||
|
||
// floor 稀疏去重:每个楼层只保留该楼层相似度最高的那条
|
||
const bestByFloor = new Map();
|
||
for (const s of selected) {
|
||
const prev = bestByFloor.get(s.floor);
|
||
if (!prev || s.similarity > prev.similarity) {
|
||
bestByFloor.set(s.floor, s);
|
||
}
|
||
}
|
||
|
||
// 最终结果按相似度降序
|
||
const sparse = Array.from(bestByFloor.values()).sort((a, b) => b.similarity - a.similarity);
|
||
|
||
const floors = [...new Set(sparse.map(c => c.floor))];
|
||
const chunks = await getChunksByFloors(chatId, floors);
|
||
const chunkMap = new Map(chunks.map(c => [c.chunkId, c]));
|
||
|
||
const results = sparse.map(item => {
|
||
const chunk = chunkMap.get(item.chunkId);
|
||
if (!chunk) return null;
|
||
return {
|
||
chunkId: item.chunkId,
|
||
floor: item.floor,
|
||
chunkIdx: item.chunkIdx,
|
||
speaker: chunk.speaker,
|
||
isUser: chunk.isUser,
|
||
text: chunk.text,
|
||
similarity: item.similarity,
|
||
};
|
||
}).filter(Boolean);
|
||
|
||
// Attach stats for logging
|
||
if (results.length > 0) {
|
||
results._preFilterStats = preFilterStats;
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// L2 Events 检索
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
|
||
const { chatId, name1 } = getContext();
|
||
if (!chatId || !queryVector?.length) {
|
||
console.warn('[searchEvents] 早期返回: chatId或queryVector为空');
|
||
return [];
|
||
}
|
||
|
||
const meta = await getMeta(chatId);
|
||
const fp = getEngineFingerprint(vectorConfig);
|
||
console.log('[searchEvents] fingerprint检查:', {
|
||
metaFp: meta.fingerprint,
|
||
currentFp: fp,
|
||
match: meta.fingerprint === fp || !meta.fingerprint,
|
||
});
|
||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||
|
||
const eventVectors = await getAllEventVectors(chatId);
|
||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||
console.log('[searchEvents] 向量数据:', {
|
||
eventVectorsCount: eventVectors.length,
|
||
vectorMapSize: vectorMap.size,
|
||
allEventsCount: allEvents?.length,
|
||
});
|
||
if (!vectorMap.size) return [];
|
||
|
||
const userName = normalize(name1);
|
||
const querySet = new Set((queryEntities || []).map(normalize));
|
||
|
||
// 只取硬约束类的 world topic
|
||
const worldTopics = (store?.json?.world || [])
|
||
.filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase()))
|
||
.map(w => normalize(w.topic))
|
||
.filter(Boolean);
|
||
|
||
const scored = (allEvents || []).map((event, idx) => {
|
||
const v = vectorMap.get(event.id);
|
||
const sim = v ? cosineSimilarity(queryVector, v) : 0;
|
||
|
||
let bonus = 0;
|
||
const reasons = [];
|
||
|
||
// participants 命中
|
||
const participants = (event.participants || []).map(normalize).filter(Boolean);
|
||
if (participants.some(p => p !== userName && querySet.has(p))) {
|
||
bonus += CONFIG.BONUS_PARTICIPANT_HIT;
|
||
reasons.push('participant');
|
||
}
|
||
|
||
// text 命中
|
||
const text = normalize(`${event.title || ''} ${event.summary || ''}`);
|
||
if ((queryEntities || []).some(e => text.includes(normalize(e)))) {
|
||
bonus += CONFIG.BONUS_TEXT_HIT;
|
||
reasons.push('text');
|
||
}
|
||
|
||
// world topic 命中
|
||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
||
reasons.push('world');
|
||
}
|
||
|
||
return {
|
||
_id: event.id,
|
||
_idx: idx,
|
||
event,
|
||
similarity: sim,
|
||
bonus,
|
||
finalScore: sim + bonus,
|
||
reasons,
|
||
isDirect: reasons.includes('participant'),
|
||
vector: v,
|
||
};
|
||
});
|
||
|
||
// 相似度分布日志
|
||
const simValues = scored.map(s => s.similarity).sort((a, b) => b - a);
|
||
console.log('[searchEvents] 相似度分布(前20):', simValues.slice(0, 20));
|
||
console.log('[searchEvents] 相似度分布(后20):', simValues.slice(-20));
|
||
console.log('[searchEvents] 有向量的事件数:', scored.filter(s => s.similarity > 0).length);
|
||
console.log('[searchEvents] sim >= 0.6:', scored.filter(s => s.similarity >= 0.6).length);
|
||
console.log('[searchEvents] sim >= 0.5:', scored.filter(s => s.similarity >= 0.5).length);
|
||
console.log('[searchEvents] sim >= 0.3:', scored.filter(s => s.similarity >= 0.3).length);
|
||
|
||
// ★ 记录过滤前的分布(用 finalScore,与显示一致)
|
||
const preFilterDistribution = {
|
||
total: scored.length,
|
||
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
||
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
|
||
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
|
||
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
|
||
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
|
||
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
|
||
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
||
};
|
||
|
||
// ★ 过滤改成用 finalScore(包含 bonus)
|
||
const candidates = scored
|
||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||
.sort((a, b) => b.finalScore - a.finalScore)
|
||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||
console.log('[searchEvents] 过滤后candidates:', candidates.length);
|
||
|
||
// 动态 K:质量不够就少拿
|
||
const dynamicK = Math.min(CONFIG.MAX_EVENTS, candidates.length);
|
||
|
||
const selected = mmrSelect(
|
||
candidates,
|
||
dynamicK,
|
||
CONFIG.MMR_LAMBDA,
|
||
c => c.vector,
|
||
c => c.finalScore
|
||
);
|
||
|
||
return selected
|
||
.sort((a, b) => b.finalScore - a.finalScore)
|
||
.map(s => ({
|
||
event: s.event,
|
||
similarity: s.finalScore,
|
||
_recallType: s.isDirect ? 'DIRECT' : 'SIMILAR',
|
||
_recallReason: s.reasons.length ? s.reasons.join('+') : '相似',
|
||
_preFilterDistribution: preFilterDistribution,
|
||
}));
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 日志:因果树格式化
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function formatCausalTree(causalEvents, recalledEvents) {
|
||
if (!causalEvents?.length) return '';
|
||
|
||
const lines = [
|
||
'',
|
||
'┌─────────────────────────────────────────────────────────────┐',
|
||
'│ 【因果链追溯】 │',
|
||
'└─────────────────────────────────────────────────────────────┘',
|
||
];
|
||
|
||
// 按 chainFrom 分组展示
|
||
const bySource = new Map();
|
||
for (const c of causalEvents) {
|
||
for (const src of c.chainFrom || []) {
|
||
if (!bySource.has(src)) bySource.set(src, []);
|
||
bySource.get(src).push(c);
|
||
}
|
||
}
|
||
|
||
for (const [sourceId, ancestors] of bySource) {
|
||
const sourceEvent = recalledEvents.find(e => e.event?.id === sourceId);
|
||
const sourceTitle = sourceEvent?.event?.title || sourceId;
|
||
lines.push(` ${sourceId} "${sourceTitle}" 的前因链:`);
|
||
|
||
// 按深度排序
|
||
ancestors.sort((a, b) => a.depth - b.depth);
|
||
|
||
for (const c of ancestors) {
|
||
const indent = ' ' + ' '.repeat(c.depth - 1);
|
||
const ev = c.event;
|
||
const title = ev.title || '(无标题)';
|
||
const refs = c.chainFrom.length > 1 ? ` [被${c.chainFrom.length}条链引用]` : '';
|
||
lines.push(`${indent}└─ [depth=${c.depth}] ${ev.id} "${title}"${refs}`);
|
||
}
|
||
}
|
||
|
||
lines.push('');
|
||
return lines.join('\n');
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 日志:主报告
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null }) {
|
||
const lines = [
|
||
'╔══════════════════════════════════════════════════════════════╗',
|
||
'║ 记忆召回报告 ║',
|
||
'╠══════════════════════════════════════════════════════════════╣',
|
||
`║ 耗时: ${elapsed}ms`,
|
||
'╚══════════════════════════════════════════════════════════════╝',
|
||
'',
|
||
'┌─────────────────────────────────────────────────────────────┐',
|
||
'│ 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) │',
|
||
'│ 权重越高 = 对召回方向影响越大 │',
|
||
'└─────────────────────────────────────────────────────────────┘',
|
||
];
|
||
|
||
// 按权重从高到低排序显示
|
||
const segmentsSorted = segments.map((s, i) => ({
|
||
idx: i + 1,
|
||
weight: weights?.[i] ?? 0,
|
||
text: s,
|
||
})).sort((a, b) => b.weight - a.weight);
|
||
|
||
segmentsSorted.forEach((s, rank) => {
|
||
const bar = '█'.repeat(Math.round(s.weight * 20));
|
||
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
||
const marker = rank === 0 ? ' ◀ 主导' : '';
|
||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||
});
|
||
|
||
lines.push('');
|
||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
|
||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||
|
||
lines.push('');
|
||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||
lines.push('│ 【L1 原文片段】 │');
|
||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||
|
||
if (chunkPreFilterStats) {
|
||
const dist = chunkPreFilterStats.distribution || {};
|
||
lines.push(` 过滤前: ${chunkPreFilterStats.total} 条`);
|
||
lines.push(' 相似度分布:');
|
||
lines.push(` 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
|
||
lines.push(` 0.55-0.6: ${dist['0.55-0.6'] || 0} | <0.55: ${dist['<0.55'] || 0}`);
|
||
lines.push(` 通过阈值(>=${chunkPreFilterStats.threshold}): ${chunkPreFilterStats.passThreshold} 条`);
|
||
lines.push(` MMR+Floor去重后: ${chunkResults.length} 条`);
|
||
} else {
|
||
lines.push(` 召回: ${chunkResults.length} 条`);
|
||
}
|
||
|
||
chunkResults.slice(0, 15).forEach((c, i) => {
|
||
const preview = c.text.length > 50 ? c.text.slice(0, 50) + '...' : c.text;
|
||
lines.push(` ${String(i + 1).padStart(2)}. #${String(c.floor).padStart(3)} [${c.speaker}] ${preview}`);
|
||
lines.push(` 相似度: ${c.similarity.toFixed(3)}`);
|
||
});
|
||
|
||
if (chunkResults.length > 15) {
|
||
lines.push(` ... 还有 ${chunkResults.length - 15} 条`);
|
||
}
|
||
|
||
lines.push('');
|
||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||
lines.push('│ 【L2 事件记忆】 │');
|
||
lines.push('│ DIRECT=亲身经历 SIMILAR=相关背景 │');
|
||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||
|
||
eventResults.forEach((e, i) => {
|
||
const type = e._recallType === 'DIRECT' ? '★ DIRECT ' : ' SIMILAR';
|
||
const title = e.event.title || '(无标题)';
|
||
lines.push(` ${String(i + 1).padStart(2)}. ${type} ${title}`);
|
||
lines.push(` 相似度: ${e.similarity.toFixed(3)} | 原因: ${e._recallReason}`);
|
||
});
|
||
|
||
// 统计
|
||
const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length;
|
||
const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length;
|
||
const preFilterDist = eventResults[0]?._preFilterDistribution || {};
|
||
|
||
lines.push('');
|
||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||
lines.push('│ 【统计】 │');
|
||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||
lines.push(` L1 片段: ${chunkResults.length} 条`);
|
||
lines.push(` L2 事件: ${eventResults.length} / ${allEvents.length} 条 (DIRECT: ${directCount}, SIMILAR: ${similarCount})`);
|
||
if (preFilterDist.total) {
|
||
lines.push(` L2 过滤前分布(${preFilterDist.total} 条,含bonus):`);
|
||
lines.push(` 0.85+: ${preFilterDist['0.85+'] || 0} | 0.7-0.85: ${preFilterDist['0.7-0.85'] || 0} | 0.6-0.7: ${preFilterDist['0.6-0.7'] || 0}`);
|
||
lines.push(` 0.5-0.6: ${preFilterDist['0.5-0.6'] || 0} | <0.5: ${preFilterDist['<0.5'] || 0}`);
|
||
lines.push(` 通过阈值(>=${preFilterDist.threshold || 0.6}): ${preFilterDist.passThreshold || 0} 条`);
|
||
}
|
||
lines.push(` 实体命中: ${queryEntities?.length || 0} 个`);
|
||
if (causalEvents.length) lines.push(` 因果链追溯: ${causalEvents.length} 条`);
|
||
lines.push('');
|
||
|
||
// 追加因果树详情
|
||
lines.push(formatCausalTree(causalEvents, eventResults));
|
||
|
||
return lines.join('\n');
|
||
}
|
||
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
// 主入口
|
||
// ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
|
||
const T0 = performance.now();
|
||
const { chat } = getContext();
|
||
const store = getSummaryStore();
|
||
const { pendingUserMessage = null } = options;
|
||
|
||
if (!allEvents?.length) {
|
||
return { events: [], chunks: [], elapsed: 0, logText: 'No events.' };
|
||
}
|
||
|
||
const segments = buildQuerySegments(chat, CONFIG.QUERY_MSG_COUNT, !!options.excludeLastAi, pendingUserMessage);
|
||
|
||
let queryVector, weights;
|
||
try {
|
||
const result = await embedWeightedQuery(segments, vectorConfig);
|
||
queryVector = result?.vector;
|
||
weights = result?.weights;
|
||
} catch (e) {
|
||
xbLog.error(MODULE_ID, '查询向量生成失败', e);
|
||
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Query embedding failed.' };
|
||
}
|
||
|
||
if (!queryVector?.length) {
|
||
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
|
||
}
|
||
|
||
const lexicon = buildEntityLexicon(store, allEvents);
|
||
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
|
||
|
||
const [chunkResults, eventResults] = await Promise.all([
|
||
searchChunks(queryVector, vectorConfig),
|
||
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities),
|
||
]);
|
||
|
||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||
|
||
// ─────────────────────────────────────────────────────────────────────
|
||
// 因果链追溯:从 eventResults 出发找祖先事件
|
||
// 注意:是否“额外注入”要去重(如果祖先事件本来已召回,就不额外注入)
|
||
// ─────────────────────────────────────────────────────────────────────
|
||
const eventIndex = buildEventIndex(allEvents);
|
||
const causalMap = traceCausalAncestors(eventResults, eventIndex);
|
||
|
||
const recalledIdSet = new Set(eventResults.map(x => x?.event?.id).filter(Boolean));
|
||
const causalEvents = Array.from(causalMap.values())
|
||
.filter(x => x?.event?.id && !recalledIdSet.has(x.event.id))
|
||
.map(x => ({
|
||
event: x.event,
|
||
similarity: 0,
|
||
_recallType: 'CAUSAL',
|
||
_recallReason: `因果链(${x.chainFrom.join(',')})`,
|
||
_causalDepth: x.depth,
|
||
_chainFrom: x.chainFrom,
|
||
chainFrom: x.chainFrom,
|
||
depth: x.depth,
|
||
}));
|
||
|
||
// 排序:引用数 > 深度 > 编号,然后截断
|
||
sortCausalEvents(causalEvents);
|
||
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
|
||
|
||
const elapsed = Math.round(performance.now() - T0);
|
||
const logText = formatRecallLog({
|
||
elapsed,
|
||
queryText,
|
||
segments,
|
||
weights,
|
||
chunkResults,
|
||
eventResults,
|
||
allEvents,
|
||
queryEntities,
|
||
causalEvents: causalEventsTruncated,
|
||
chunkPreFilterStats,
|
||
});
|
||
|
||
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
|
||
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
|
||
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
|
||
console.groupEnd();
|
||
|
||
return { events: eventResults, causalEvents: causalEventsTruncated, chunks: chunkResults, elapsed, logText, queryEntities };
|
||
}
|
||
|
||
export function buildQueryText(chat, count = 2, excludeLastAi = false) {
|
||
if (!chat?.length) return '';
|
||
|
||
let messages = chat;
|
||
if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
|
||
messages = messages.slice(0, -1);
|
||
}
|
||
|
||
return messages.slice(-count).map(m => {
|
||
const text = cleanForRecall(m.mes);
|
||
const speaker = m.name || (m.is_user ? '用户' : '角色');
|
||
return `${speaker}: ${text.slice(0, 500)}`;
|
||
}).filter(Boolean).join('\n');
|
||
}
|