From 7eaf411ff9aadd181f488a84b914fc394f6bbebc Mon Sep 17 00:00:00 2001 From: bielie Date: Sun, 15 Feb 2026 01:28:35 +0800 Subject: [PATCH] move final L1 pairing after diffusion and relax ppr epsilon --- .../vector/retrieval/diffusion.js | 2 +- .../story-summary/vector/retrieval/recall.js | 146 +++++++++++++----- 2 files changed, 106 insertions(+), 42 deletions(-) diff --git a/modules/story-summary/vector/retrieval/diffusion.js b/modules/story-summary/vector/retrieval/diffusion.js index b26291c..300a2f9 100644 --- a/modules/story-summary/vector/retrieval/diffusion.js +++ b/modules/story-summary/vector/retrieval/diffusion.js @@ -42,7 +42,7 @@ const MODULE_ID = 'diffusion'; const CONFIG = { // PPR parameters (Page et al. 1998; GraftNet 2018 uses same values) ALPHA: 0.15, // restart probability - EPSILON: 1e-6, // L1 convergence threshold + EPSILON: 1e-5, // L1 convergence threshold MAX_ITER: 50, // hard iteration cap (typically converges in 15-25) // Edge weight channel coefficients diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js index 28794eb..ff0e601 100644 --- a/modules/story-summary/vector/retrieval/recall.js +++ b/modules/story-summary/vector/retrieval/recall.js @@ -517,12 +517,12 @@ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) { } // ═══════════════════════════════════════════════════════════════════════════ -// [Stage 6] Floor 融合 + Rerank + L1 配对 +// [Stage 6] Floor 融合 + Rerank // ═══════════════════════════════════════════════════════════════════════════ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) { const { chatId, chat, name1, name2 } = getContext(); - if (!chatId) return { l0Selected: [], l1ByFloor: new Map() }; + if (!chatId) return { l0Selected: [], l1ScoredByFloor: new Map() }; const T_Start = performance.now(); @@ -627,7 +627,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic metrics.evidence.l1CosineTime = 0; metrics.evidence.rerankApplied = false; } - return { l0Selected: [], l1ByFloor: new Map() }; + return { l0Selected: [], l1ScoredByFloor: new Map() }; } // ───────────────────────────────────────────────────────────────── @@ -645,16 +645,6 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic const l1ScoredByFloor = await pullAndScoreL1(chatId, [...floorsToFetch], queryVector, chat); - if (metrics) { - let totalPulled = 0; - for (const [key, chunks] of l1ScoredByFloor) { - if (key === '_cosineTime') continue; - totalPulled += chunks.length; - } - metrics.evidence.l1Pulled = totalPulled; - metrics.evidence.l1CosineTime = l1ScoredByFloor._cosineTime || 0; - } - // ───────────────────────────────────────────────────────────────── // 6e. 构建 rerank documents(每个 floor: USER chunks + AI chunks) // ───────────────────────────────────────────────────────────────── @@ -728,7 +718,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic } // ───────────────────────────────────────────────────────────────── - // 6g. 收集 L0 atoms + L1 top-1 配对 + // 6g. 收集 L0 atoms // ───────────────────────────────────────────────────────────────── // 仅保留“真实 dense 命中”的 L0 原子: @@ -749,8 +739,6 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic } const l0Selected = []; - const l1ByFloor = new Map(); - let contextPairsAdded = 0; for (const item of reranked) { const floor = item.floor; @@ -770,34 +758,16 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic }); } - const aiChunks = l1ScoredByFloor.get(floor) || []; - const userFloor = floor - 1; - const userChunks = (userFloor >= 0 && chat?.[userFloor]?.is_user) - ? (l1ScoredByFloor.get(userFloor) || []) - : []; - - const aiTop1 = aiChunks.length > 0 - ? aiChunks.reduce((best, c) => (c._cosineScore > best._cosineScore ? c : best)) - : null; - const userTop1 = userChunks.length > 0 - ? userChunks.reduce((best, c) => (c._cosineScore > best._cosineScore ? c : best)) - : null; - - if (userTop1) contextPairsAdded++; - l1ByFloor.set(floor, { aiTop1, userTop1 }); } if (metrics) { metrics.evidence.floorsSelected = reranked.length; metrics.evidence.l0Collected = l0Selected.length; - let totalAttached = 0; - for (const [, pair] of l1ByFloor) { - if (pair.aiTop1) totalAttached++; - if (pair.userTop1) totalAttached++; - } - metrics.evidence.l1Attached = totalAttached; - metrics.evidence.contextPairsAdded = contextPairsAdded; + metrics.evidence.l1Pulled = 0; + metrics.evidence.l1Attached = 0; + metrics.evidence.l1CosineTime = 0; + metrics.evidence.contextPairsAdded = 0; } const totalTime = Math.round(performance.now() - T_Start); @@ -806,10 +776,10 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic } xbLog.info(MODULE_ID, - `Evidence: ${denseFloorRank.length} dense floors + ${lexFloorRank.length} lex floors (${lexFloorFilteredByDense} lex filtered by dense) → fusion=${fusedFloors.length} → rerank=${reranked.length} floors → L0=${l0Selected.length} L1 attached=${metrics?.evidence?.l1Attached || 0} (${totalTime}ms)` + `Evidence: ${denseFloorRank.length} dense floors + ${lexFloorRank.length} lex floors (${lexFloorFilteredByDense} lex filtered by dense) → fusion=${fusedFloors.length} → rerank=${reranked.length} floors → L0=${l0Selected.length} (${totalTime}ms)` ); - return { l0Selected, l1ByFloor }; + return { l0Selected, l1ScoredByFloor }; } // ═══════════════════════════════════════════════════════════════════════════ @@ -885,6 +855,93 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) { return result; } +async function buildL1PairsForSelectedFloors(l0Selected, queryVector, prefetchedL1ByFloor, metrics) { + const T0 = performance.now(); + const { chatId, chat } = getContext(); + + const l1ByFloor = new Map(); + if (!chatId || !queryVector?.length || !l0Selected?.length) { + if (metrics) { + metrics.evidence.l1Pulled = 0; + metrics.evidence.l1Attached = 0; + metrics.evidence.l1CosineTime = 0; + metrics.evidence.contextPairsAdded = 0; + } + return l1ByFloor; + } + + const requiredFloors = new Set(); + const selectedFloors = new Set(); + for (const l0 of l0Selected) { + const floor = Number(l0?.floor); + if (!Number.isInteger(floor) || floor < 0) continue; + selectedFloors.add(floor); + requiredFloors.add(floor); + const userFloor = floor - 1; + if (userFloor >= 0 && chat?.[userFloor]?.is_user) { + requiredFloors.add(userFloor); + } + } + + const merged = new Map(); + const prefetched = prefetchedL1ByFloor || new Map(); + let totalCosineTime = Number(prefetched._cosineTime || 0); + + for (const [floor, chunks] of prefetched) { + if (!requiredFloors.has(floor)) continue; + merged.set(floor, chunks); + } + + const missingFloors = [...requiredFloors].filter(f => !merged.has(f)); + if (missingFloors.length > 0) { + const extra = await pullAndScoreL1(chatId, missingFloors, queryVector, chat); + totalCosineTime += Number(extra._cosineTime || 0); + for (const [floor, chunks] of extra) { + if (floor === '_cosineTime') continue; + if (!requiredFloors.has(floor)) continue; + merged.set(floor, chunks); + } + } + + let contextPairsAdded = 0; + let totalAttached = 0; + for (const floor of selectedFloors) { + const aiChunks = merged.get(floor) || []; + const userFloor = floor - 1; + const userChunks = (userFloor >= 0 && chat?.[userFloor]?.is_user) + ? (merged.get(userFloor) || []) + : []; + + const aiTop1 = aiChunks.length > 0 + ? aiChunks.reduce((best, c) => (c._cosineScore > best._cosineScore ? c : best)) + : null; + const userTop1 = userChunks.length > 0 + ? userChunks.reduce((best, c) => (c._cosineScore > best._cosineScore ? c : best)) + : null; + + if (aiTop1) totalAttached++; + if (userTop1) { + totalAttached++; + contextPairsAdded++; + } + l1ByFloor.set(floor, { aiTop1, userTop1 }); + } + + if (metrics) { + let totalPulled = 0; + for (const [, chunks] of merged) { + totalPulled += chunks.length; + } + metrics.evidence.l1Pulled = totalPulled; + metrics.evidence.l1Attached = totalAttached; + metrics.evidence.l1CosineTime = totalCosineTime; + metrics.evidence.contextPairsAdded = contextPairsAdded; + metrics.timing.evidenceRetrieval += Math.round(performance.now() - T0); + } + + return l1ByFloor; +} + // ═══════════════════════════════════════════════════════════════════════════ // 主函数 // ═══════════════════════════════════════════════════════════════════════════ @@ -1173,7 +1230,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { // 阶段 6: Floor 粒度融合 + Rerank + L1 配对 // ═══════════════════════════════════════════════════════════════════ - const { l0Selected, l1ByFloor } = await locateAndPullEvidence( + const { l0Selected, l1ScoredByFloor } = await locateAndPullEvidence( anchorHits, queryVector_v1, bundle.rerankQuery, @@ -1259,6 +1316,13 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) { `L0-linked events: ${recalledL0Floors.size} floors → ${l0LinkedCount} events linked (sim≥${CONFIG.LEXICAL_EVENT_DENSE_MIN})` ); + const l1ByFloor = await buildL1PairsForSelectedFloors( + l0Selected, + queryVector_v1, + l1ScoredByFloor, + metrics + ); + // ═══════════════════════════════════════════════════════════════════ // 阶段 9: Causation Trace // ═══════════════════════════════════════════════════════════════════