diff --git a/modules/story-summary/generate/prompt.js b/modules/story-summary/generate/prompt.js index bed9baf..4790800 100644 --- a/modules/story-summary/generate/prompt.js +++ b/modules/story-summary/generate/prompt.js @@ -53,6 +53,8 @@ const TOP_N_STAR = 5; // L0 显示文本:分号拼接 vs 多行模式的阈值 const L0_JOINED_MAX_LENGTH = 120; +// 背景证据实体过滤旁通阈值(与事件过滤策略一致) +const EVIDENCE_ENTITY_BYPASS_SIM = 0.80; // ───────────────────────────────────────────────────────────────────────────── // 工具函数 @@ -123,6 +125,63 @@ function normalize(s) { .toLowerCase(); } +/** + * 收集 L0 的实体集合(用于背景证据实体过滤) + * 支持新结构 who/edges,也兼容旧结构 subject/object。 + * @param {object} l0 + * @returns {Set} + */ +function collectL0Entities(l0) { + const atom = l0?.atom || l0?._atom || {}; + const set = new Set(); + + const add = (v) => { + const n = normalize(v); + if (n) set.add(n); + }; + + for (const w of (atom.who || [])) add(w); + for (const e of (atom.edges || [])) { + add(e?.s); + add(e?.t); + } + + // 兼容旧数据 + add(atom.subject); + add(atom.object); + + return set; +} + +/** + * 背景证据是否保留(按焦点实体过滤) + * 规则: + * 1) 无焦点实体:保留 + * 2) similarity >= 0.80:保留(旁通) + * 3) who/edges 命中焦点实体:保留 + * 4) 兼容旧数据:semantic 文本包含焦点实体:保留 + * 否则过滤。 + * @param {object} l0 + * @param {Set} focusSet + * @returns {boolean} + */ +function shouldKeepEvidenceL0(l0, focusSet) { + if (!focusSet?.size) return true; + if ((l0?.similarity || 0) >= EVIDENCE_ENTITY_BYPASS_SIM) return true; + + const entities = collectL0Entities(l0); + for (const f of focusSet) { + if (entities.has(f)) return true; + } + + // 旧数据兜底:从 semantic 文本里做包含匹配 + const textNorm = normalize(l0?.atom?.semantic || l0?.text || ''); + for (const f of focusSet) { + if (f && textNorm.includes(f)) return true; + } + return false; +} + /** * 获取事件排序键 * @param {object} event - 事件对象 @@ -894,8 +953,11 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities, const keepVisible = store.keepVisibleCount ?? 3; // 收集未被事件消费的 L0,按 rerankScore 降序 + const focusSetForEvidence = new Set((focusEntities || []).map(normalize).filter(Boolean)); + const remainingL0 = l0Selected .filter(l0 => !usedL0Ids.has(l0.id)) + .filter(l0 => shouldKeepEvidenceL0(l0, focusSetForEvidence)) .sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0)); // 远期:floor <= lastSummarized diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js index d91189b..218acd0 100644 --- a/modules/story-summary/vector/retrieval/recall.js +++ b/modules/story-summary/vector/retrieval/recall.js @@ -723,18 +723,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic // 6g. 收集 L0 atoms + L1 top-1 配对 // ───────────────────────────────────────────────────────────────── - const atomsList = getStateAtoms(); - const atomsByFloor = new Map(); - for (const atom of atomsList) { - if (typeof atom.floor !== 'number' || atom.floor < 0) continue; - if (!atomsByFloor.has(atom.floor)) atomsByFloor.set(atom.floor, []); - atomsByFloor.get(atom.floor).push(atom); + // 仅保留“真实 dense 命中”的 L0 原子: + // 旧逻辑按 floor 全塞,容易把同层无关原子带进来。 + const atomById = new Map(getStateAtoms().map(a => [a.atomId, a])); + const matchedAtomsByFloor = new Map(); + for (const hit of (anchorHits || [])) { + const atom = hit.atom || atomById.get(hit.atomId); + if (!atom) continue; + if (!matchedAtomsByFloor.has(hit.floor)) matchedAtomsByFloor.set(hit.floor, []); + matchedAtomsByFloor.get(hit.floor).push({ + atom, + similarity: hit.similarity, + }); } - - const denseFloorMaxMap = new Map(); - for (const a of (anchorHits || [])) { - const cur = denseFloorMaxMap.get(a.floor) || 0; - if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity); + for (const arr of matchedAtomsByFloor.values()) { + arr.sort((a, b) => b.similarity - a.similarity); } const l0Selected = []; @@ -744,15 +747,15 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic for (const item of reranked) { const floor = item.floor; const rerankScore = item._rerankScore || 0; - const denseSim = denseFloorMaxMap.get(floor) || 0; - const floorAtoms = atomsByFloor.get(floor) || []; - for (const atom of floorAtoms) { + // 仅收集该 floor 中真实命中的 L0 atoms + const floorMatchedAtoms = matchedAtomsByFloor.get(floor) || []; + for (const { atom, similarity } of floorMatchedAtoms) { l0Selected.push({ id: `anchor-${atom.atomId}`, atomId: atom.atomId, floor: atom.floor, - similarity: denseSim, + similarity, rerankScore, atom, text: atom.semantic || '',