fix(recall): keep only matched atoms and filter evidence by focus entities

This commit is contained in:
2026-02-12 00:05:19 +08:00
parent 9f279d902f
commit 111cd081f6
2 changed files with 80 additions and 15 deletions

View File

@@ -53,6 +53,8 @@ const TOP_N_STAR = 5;
// L0 显示文本:分号拼接 vs 多行模式的阈值 // L0 显示文本:分号拼接 vs 多行模式的阈值
const L0_JOINED_MAX_LENGTH = 120; const L0_JOINED_MAX_LENGTH = 120;
// 背景证据实体过滤旁通阈值(与事件过滤策略一致)
const EVIDENCE_ENTITY_BYPASS_SIM = 0.80;
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 工具函数 // 工具函数
@@ -123,6 +125,63 @@ function normalize(s) {
.toLowerCase(); .toLowerCase();
} }
/**
* 收集 L0 的实体集合(用于背景证据实体过滤)
* 支持新结构 who/edges也兼容旧结构 subject/object。
* @param {object} l0
* @returns {Set<string>}
*/
function collectL0Entities(l0) {
const atom = l0?.atom || l0?._atom || {};
const set = new Set();
const add = (v) => {
const n = normalize(v);
if (n) set.add(n);
};
for (const w of (atom.who || [])) add(w);
for (const e of (atom.edges || [])) {
add(e?.s);
add(e?.t);
}
// 兼容旧数据
add(atom.subject);
add(atom.object);
return set;
}
/**
* 背景证据是否保留(按焦点实体过滤)
* 规则:
* 1) 无焦点实体:保留
* 2) similarity >= 0.80:保留(旁通)
* 3) who/edges 命中焦点实体:保留
* 4) 兼容旧数据semantic 文本包含焦点实体:保留
* 否则过滤。
* @param {object} l0
* @param {Set<string>} focusSet
* @returns {boolean}
*/
function shouldKeepEvidenceL0(l0, focusSet) {
if (!focusSet?.size) return true;
if ((l0?.similarity || 0) >= EVIDENCE_ENTITY_BYPASS_SIM) return true;
const entities = collectL0Entities(l0);
for (const f of focusSet) {
if (entities.has(f)) return true;
}
// 旧数据兜底:从 semantic 文本里做包含匹配
const textNorm = normalize(l0?.atom?.semantic || l0?.text || '');
for (const f of focusSet) {
if (f && textNorm.includes(f)) return true;
}
return false;
}
/** /**
* 获取事件排序键 * 获取事件排序键
* @param {object} event - 事件对象 * @param {object} event - 事件对象
@@ -894,8 +953,11 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
const keepVisible = store.keepVisibleCount ?? 3; const keepVisible = store.keepVisibleCount ?? 3;
// 收集未被事件消费的 L0按 rerankScore 降序 // 收集未被事件消费的 L0按 rerankScore 降序
const focusSetForEvidence = new Set((focusEntities || []).map(normalize).filter(Boolean));
const remainingL0 = l0Selected const remainingL0 = l0Selected
.filter(l0 => !usedL0Ids.has(l0.id)) .filter(l0 => !usedL0Ids.has(l0.id))
.filter(l0 => shouldKeepEvidenceL0(l0, focusSetForEvidence))
.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0)); .sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0));
// 远期floor <= lastSummarized // 远期floor <= lastSummarized

View File

@@ -723,18 +723,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
// 6g. 收集 L0 atoms + L1 top-1 配对 // 6g. 收集 L0 atoms + L1 top-1 配对
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
const atomsList = getStateAtoms(); // 仅保留“真实 dense 命中”的 L0 原子:
const atomsByFloor = new Map(); // 旧逻辑按 floor 全塞,容易把同层无关原子带进来。
for (const atom of atomsList) { const atomById = new Map(getStateAtoms().map(a => [a.atomId, a]));
if (typeof atom.floor !== 'number' || atom.floor < 0) continue; const matchedAtomsByFloor = new Map();
if (!atomsByFloor.has(atom.floor)) atomsByFloor.set(atom.floor, []); for (const hit of (anchorHits || [])) {
atomsByFloor.get(atom.floor).push(atom); const atom = hit.atom || atomById.get(hit.atomId);
if (!atom) continue;
if (!matchedAtomsByFloor.has(hit.floor)) matchedAtomsByFloor.set(hit.floor, []);
matchedAtomsByFloor.get(hit.floor).push({
atom,
similarity: hit.similarity,
});
} }
for (const arr of matchedAtomsByFloor.values()) {
const denseFloorMaxMap = new Map(); arr.sort((a, b) => b.similarity - a.similarity);
for (const a of (anchorHits || [])) {
const cur = denseFloorMaxMap.get(a.floor) || 0;
if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity);
} }
const l0Selected = []; const l0Selected = [];
@@ -744,15 +747,15 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
for (const item of reranked) { for (const item of reranked) {
const floor = item.floor; const floor = item.floor;
const rerankScore = item._rerankScore || 0; const rerankScore = item._rerankScore || 0;
const denseSim = denseFloorMaxMap.get(floor) || 0;
const floorAtoms = atomsByFloor.get(floor) || []; // 仅收集该 floor 中真实命中的 L0 atoms
for (const atom of floorAtoms) { const floorMatchedAtoms = matchedAtomsByFloor.get(floor) || [];
for (const { atom, similarity } of floorMatchedAtoms) {
l0Selected.push({ l0Selected.push({
id: `anchor-${atom.atomId}`, id: `anchor-${atom.atomId}`,
atomId: atom.atomId, atomId: atom.atomId,
floor: atom.floor, floor: atom.floor,
similarity: denseSim, similarity,
rerankScore, rerankScore,
atom, atom,
text: atom.semantic || '', text: atom.semantic || '',