fix(recall): keep only matched atoms and filter evidence by focus entities

This commit is contained in:
2026-02-12 00:05:19 +08:00
parent 9f279d902f
commit 111cd081f6
2 changed files with 80 additions and 15 deletions

View File

@@ -53,6 +53,8 @@ const TOP_N_STAR = 5;
// L0 显示文本:分号拼接 vs 多行模式的阈值
const L0_JOINED_MAX_LENGTH = 120;
// 背景证据实体过滤旁通阈值(与事件过滤策略一致)
const EVIDENCE_ENTITY_BYPASS_SIM = 0.80;
// ─────────────────────────────────────────────────────────────────────────────
// 工具函数
@@ -123,6 +125,63 @@ function normalize(s) {
.toLowerCase();
}
/**
* 收集 L0 的实体集合(用于背景证据实体过滤)
* 支持新结构 who/edges也兼容旧结构 subject/object。
* @param {object} l0
* @returns {Set<string>}
*/
function collectL0Entities(l0) {
const atom = l0?.atom || l0?._atom || {};
const set = new Set();
const add = (v) => {
const n = normalize(v);
if (n) set.add(n);
};
for (const w of (atom.who || [])) add(w);
for (const e of (atom.edges || [])) {
add(e?.s);
add(e?.t);
}
// 兼容旧数据
add(atom.subject);
add(atom.object);
return set;
}
/**
* 背景证据是否保留(按焦点实体过滤)
* 规则:
* 1) 无焦点实体:保留
* 2) similarity >= 0.80:保留(旁通)
* 3) who/edges 命中焦点实体:保留
* 4) 兼容旧数据semantic 文本包含焦点实体:保留
* 否则过滤。
* @param {object} l0
* @param {Set<string>} focusSet
* @returns {boolean}
*/
function shouldKeepEvidenceL0(l0, focusSet) {
if (!focusSet?.size) return true;
if ((l0?.similarity || 0) >= EVIDENCE_ENTITY_BYPASS_SIM) return true;
const entities = collectL0Entities(l0);
for (const f of focusSet) {
if (entities.has(f)) return true;
}
// 旧数据兜底:从 semantic 文本里做包含匹配
const textNorm = normalize(l0?.atom?.semantic || l0?.text || '');
for (const f of focusSet) {
if (f && textNorm.includes(f)) return true;
}
return false;
}
/**
* 获取事件排序键
* @param {object} event - 事件对象
@@ -894,8 +953,11 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
const keepVisible = store.keepVisibleCount ?? 3;
// 收集未被事件消费的 L0按 rerankScore 降序
const focusSetForEvidence = new Set((focusEntities || []).map(normalize).filter(Boolean));
const remainingL0 = l0Selected
.filter(l0 => !usedL0Ids.has(l0.id))
.filter(l0 => shouldKeepEvidenceL0(l0, focusSetForEvidence))
.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0));
// 远期floor <= lastSummarized