fix(recall): keep only matched atoms and filter evidence by focus entities
This commit is contained in:
@@ -53,6 +53,8 @@ const TOP_N_STAR = 5;
|
|||||||
|
|
||||||
// L0 显示文本:分号拼接 vs 多行模式的阈值
|
// L0 显示文本:分号拼接 vs 多行模式的阈值
|
||||||
const L0_JOINED_MAX_LENGTH = 120;
|
const L0_JOINED_MAX_LENGTH = 120;
|
||||||
|
// 背景证据实体过滤旁通阈值(与事件过滤策略一致)
|
||||||
|
const EVIDENCE_ENTITY_BYPASS_SIM = 0.80;
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// 工具函数
|
// 工具函数
|
||||||
@@ -123,6 +125,63 @@ function normalize(s) {
|
|||||||
.toLowerCase();
|
.toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 收集 L0 的实体集合(用于背景证据实体过滤)
|
||||||
|
* 支持新结构 who/edges,也兼容旧结构 subject/object。
|
||||||
|
* @param {object} l0
|
||||||
|
* @returns {Set<string>}
|
||||||
|
*/
|
||||||
|
function collectL0Entities(l0) {
|
||||||
|
const atom = l0?.atom || l0?._atom || {};
|
||||||
|
const set = new Set();
|
||||||
|
|
||||||
|
const add = (v) => {
|
||||||
|
const n = normalize(v);
|
||||||
|
if (n) set.add(n);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const w of (atom.who || [])) add(w);
|
||||||
|
for (const e of (atom.edges || [])) {
|
||||||
|
add(e?.s);
|
||||||
|
add(e?.t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 兼容旧数据
|
||||||
|
add(atom.subject);
|
||||||
|
add(atom.object);
|
||||||
|
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 背景证据是否保留(按焦点实体过滤)
|
||||||
|
* 规则:
|
||||||
|
* 1) 无焦点实体:保留
|
||||||
|
* 2) similarity >= 0.80:保留(旁通)
|
||||||
|
* 3) who/edges 命中焦点实体:保留
|
||||||
|
* 4) 兼容旧数据:semantic 文本包含焦点实体:保留
|
||||||
|
* 否则过滤。
|
||||||
|
* @param {object} l0
|
||||||
|
* @param {Set<string>} focusSet
|
||||||
|
* @returns {boolean}
|
||||||
|
*/
|
||||||
|
function shouldKeepEvidenceL0(l0, focusSet) {
|
||||||
|
if (!focusSet?.size) return true;
|
||||||
|
if ((l0?.similarity || 0) >= EVIDENCE_ENTITY_BYPASS_SIM) return true;
|
||||||
|
|
||||||
|
const entities = collectL0Entities(l0);
|
||||||
|
for (const f of focusSet) {
|
||||||
|
if (entities.has(f)) return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 旧数据兜底:从 semantic 文本里做包含匹配
|
||||||
|
const textNorm = normalize(l0?.atom?.semantic || l0?.text || '');
|
||||||
|
for (const f of focusSet) {
|
||||||
|
if (f && textNorm.includes(f)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取事件排序键
|
* 获取事件排序键
|
||||||
* @param {object} event - 事件对象
|
* @param {object} event - 事件对象
|
||||||
@@ -894,8 +953,11 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities,
|
|||||||
const keepVisible = store.keepVisibleCount ?? 3;
|
const keepVisible = store.keepVisibleCount ?? 3;
|
||||||
|
|
||||||
// 收集未被事件消费的 L0,按 rerankScore 降序
|
// 收集未被事件消费的 L0,按 rerankScore 降序
|
||||||
|
const focusSetForEvidence = new Set((focusEntities || []).map(normalize).filter(Boolean));
|
||||||
|
|
||||||
const remainingL0 = l0Selected
|
const remainingL0 = l0Selected
|
||||||
.filter(l0 => !usedL0Ids.has(l0.id))
|
.filter(l0 => !usedL0Ids.has(l0.id))
|
||||||
|
.filter(l0 => shouldKeepEvidenceL0(l0, focusSetForEvidence))
|
||||||
.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0));
|
.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0));
|
||||||
|
|
||||||
// 远期:floor <= lastSummarized
|
// 远期:floor <= lastSummarized
|
||||||
|
|||||||
@@ -723,18 +723,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
|
|||||||
// 6g. 收集 L0 atoms + L1 top-1 配对
|
// 6g. 收集 L0 atoms + L1 top-1 配对
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
const atomsList = getStateAtoms();
|
// 仅保留“真实 dense 命中”的 L0 原子:
|
||||||
const atomsByFloor = new Map();
|
// 旧逻辑按 floor 全塞,容易把同层无关原子带进来。
|
||||||
for (const atom of atomsList) {
|
const atomById = new Map(getStateAtoms().map(a => [a.atomId, a]));
|
||||||
if (typeof atom.floor !== 'number' || atom.floor < 0) continue;
|
const matchedAtomsByFloor = new Map();
|
||||||
if (!atomsByFloor.has(atom.floor)) atomsByFloor.set(atom.floor, []);
|
for (const hit of (anchorHits || [])) {
|
||||||
atomsByFloor.get(atom.floor).push(atom);
|
const atom = hit.atom || atomById.get(hit.atomId);
|
||||||
|
if (!atom) continue;
|
||||||
|
if (!matchedAtomsByFloor.has(hit.floor)) matchedAtomsByFloor.set(hit.floor, []);
|
||||||
|
matchedAtomsByFloor.get(hit.floor).push({
|
||||||
|
atom,
|
||||||
|
similarity: hit.similarity,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
for (const arr of matchedAtomsByFloor.values()) {
|
||||||
const denseFloorMaxMap = new Map();
|
arr.sort((a, b) => b.similarity - a.similarity);
|
||||||
for (const a of (anchorHits || [])) {
|
|
||||||
const cur = denseFloorMaxMap.get(a.floor) || 0;
|
|
||||||
if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const l0Selected = [];
|
const l0Selected = [];
|
||||||
@@ -744,15 +747,15 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
|
|||||||
for (const item of reranked) {
|
for (const item of reranked) {
|
||||||
const floor = item.floor;
|
const floor = item.floor;
|
||||||
const rerankScore = item._rerankScore || 0;
|
const rerankScore = item._rerankScore || 0;
|
||||||
const denseSim = denseFloorMaxMap.get(floor) || 0;
|
|
||||||
|
|
||||||
const floorAtoms = atomsByFloor.get(floor) || [];
|
// 仅收集该 floor 中真实命中的 L0 atoms
|
||||||
for (const atom of floorAtoms) {
|
const floorMatchedAtoms = matchedAtomsByFloor.get(floor) || [];
|
||||||
|
for (const { atom, similarity } of floorMatchedAtoms) {
|
||||||
l0Selected.push({
|
l0Selected.push({
|
||||||
id: `anchor-${atom.atomId}`,
|
id: `anchor-${atom.atomId}`,
|
||||||
atomId: atom.atomId,
|
atomId: atom.atomId,
|
||||||
floor: atom.floor,
|
floor: atom.floor,
|
||||||
similarity: denseSim,
|
similarity,
|
||||||
rerankScore,
|
rerankScore,
|
||||||
atom,
|
atom,
|
||||||
text: atom.semantic || '',
|
text: atom.semantic || '',
|
||||||
|
|||||||
Reference in New Issue
Block a user