Add files via upload

2026-02-17 22:45:01 +08:00
parent 59e7301bf8
commit 4bbc2f9fd5
18 changed files with 5167 additions and 442 deletions
--- a/modules/story-summary/vector/retrieval/lexical-index.js
+++ b/modules/story-summary/vector/retrieval/lexical-index.js
@@ -1,16 +1,3 @@
-// ═══════════════════════════════════════════════════════════════════════════
-// lexical-index.js - MiniSearch 词法检索索引
-//
-// 职责：
-// 1. 对 L0 atoms + L1 chunks + L2 events 建立词法索引
-// 2. 提供词法检索接口（专名精确匹配兜底）
-// 3. 惰性构建 + 异步预热 + 缓存失效机制
-//
-// 索引存储：纯内存（不持久化）
-// 分词器：统一使用 tokenizer.js（结巴 + 实体保护 + 降级）
-// 重建时机：CHAT_CHANGED / L0提取完成 / L2总结完成
-// ═══════════════════════════════════════════════════════════════════════════
-
 import MiniSearch from '../../../../libs/minisearch.mjs';
 import { getContext } from '../../../../../../../extensions.js';
 import { getSummaryStore } from '../../data/store.js';
@@ -20,76 +7,166 @@ import { tokenizeForIndex } from '../utils/tokenizer.js';

 const MODULE_ID = 'lexical-index';

-// ─────────────────────────────────────────────────────────────────────────
-// 缓存
-// ─────────────────────────────────────────────────────────────────────────
-
-/** @type {MiniSearch|null} */
+// In-memory index cache
 let cachedIndex = null;
-
-/** @type {string|null} */
 let cachedChatId = null;
-
-/** @type {string|null} 数据指纹（atoms + chunks + events 数量） */
 let cachedFingerprint = null;
-
-/** @type {boolean} 是否正在构建 */
 let building = false;
-
-/** @type {Promise<MiniSearch|null>|null} 当前构建 Promise（防重入） */
 let buildPromise = null;
-/** @type {Map<number, string[]>} floor → 该楼层的 doc IDs（仅 L1 chunks） */
+
+// floor -> chunk doc ids (L1 only)
 let floorDocIds = new Map();

-// ─────────────────────────────────────────────────────────────────────────
-// 工具函数
-// ─────────────────────────────────────────────────────────────────────────
+// IDF stats over lexical docs (L1 chunks + L2 events)
+let termDfMap = new Map();
+let docTokenSets = new Map(); // docId -> Set<token>
+let lexicalDocCount = 0;
+
+const IDF_MIN = 1.0;
+const IDF_MAX = 4.0;
+const BUILD_BATCH_SIZE = 500;

-/**
- * 清理事件摘要（移除楼层标记）
- * @param {string} summary
- * @returns {string}
- */
 function cleanSummary(summary) {
    return String(summary || '')
        .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
        .trim();
 }

-/**
- * 计算缓存指纹
- * @param {number} chunkCount
- * @param {number} eventCount
- * @returns {string}
- */
-function computeFingerprint(chunkCount, eventCount) {
-    return `${chunkCount}:${eventCount}`;
+function fnv1a32(input, seed = 0x811C9DC5) {
+    let hash = seed >>> 0;
+    const text = String(input || '');
+    for (let i = 0; i < text.length; i++) {
+        hash ^= text.charCodeAt(i);
+        hash = Math.imul(hash, 0x01000193) >>> 0;
+    }
+    return hash >>> 0;
+}
+
+function compareDocKeys(a, b) {
+    const ka = `${a?.type || ''}:${a?.id || ''}`;
+    const kb = `${b?.type || ''}:${b?.id || ''}`;
+    if (ka < kb) return -1;
+    if (ka > kb) return 1;
+    return 0;
+}
+
+function computeFingerprintFromDocs(docs) {
+    const normalizedDocs = Array.isArray(docs) ? [...docs].sort(compareDocKeys) : [];
+    let hash = 0x811C9DC5;
+
+    for (const doc of normalizedDocs) {
+        const payload = `${doc?.type || ''}\u001F${doc?.id || ''}\u001F${doc?.floor ?? ''}\u001F${doc?.text || ''}\u001E`;
+        hash = fnv1a32(payload, hash);
+    }
+
+    return `${normalizedDocs.length}:${(hash >>> 0).toString(16)}`;
 }

-/**
- * 让出主线程（避免长时间阻塞 UI）
- * @returns {Promise<void>}
- */
 function yieldToMain() {
    return new Promise(resolve => setTimeout(resolve, 0));
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 文档收集
-// ─────────────────────────────────────────────────────────────────────────
+function clamp(v, min, max) {
+    return Math.max(min, Math.min(max, v));
+}
+
+function normalizeTerm(term) {
+    return String(term || '').trim().toLowerCase();
+}
+
+function computeIdfFromDf(df, docCount) {
+    if (!docCount || docCount <= 0) return 1;
+    const raw = Math.log((docCount + 1) / ((df || 0) + 1)) + 1;
+    return clamp(raw, IDF_MIN, IDF_MAX);
+}
+
+function computeIdf(term) {
+    const t = normalizeTerm(term);
+    if (!t || lexicalDocCount <= 0) return 1;
+    return computeIdfFromDf(termDfMap.get(t) || 0, lexicalDocCount);
+}
+
+function extractUniqueTokens(text) {
+    return new Set(tokenizeForIndex(String(text || '')).map(normalizeTerm).filter(Boolean));
+}
+
+function clearIdfState() {
+    termDfMap = new Map();
+    docTokenSets = new Map();
+    lexicalDocCount = 0;
+}
+
+function removeDocumentIdf(docId) {
+    const id = String(docId || '');
+    if (!id) return;
+
+    const tokens = docTokenSets.get(id);
+    if (!tokens) return;
+
+    for (const token of tokens) {
+        const current = termDfMap.get(token) || 0;
+        if (current <= 1) {
+            termDfMap.delete(token);
+        } else {
+            termDfMap.set(token, current - 1);
+        }
+    }
+
+    docTokenSets.delete(id);
+    lexicalDocCount = Math.max(0, lexicalDocCount - 1);
+}
+
+function addDocumentIdf(docId, text) {
+    const id = String(docId || '');
+    if (!id) return;
+
+    // Replace semantics: remove old token set first if this id already exists.
+    removeDocumentIdf(id);
+
+    const tokens = extractUniqueTokens(text);
+    docTokenSets.set(id, tokens);
+    lexicalDocCount += 1;
+
+    for (const token of tokens) {
+        termDfMap.set(token, (termDfMap.get(token) || 0) + 1);
+    }
+}
+
+function rebuildIdfFromDocs(docs) {
+    clearIdfState();
+    for (const doc of docs || []) {
+        const id = String(doc?.id || '');
+        const text = String(doc?.text || '');
+        if (!id || !text.trim()) continue;
+        addDocumentIdf(id, text);
+    }
+}
+
+function buildEventDoc(ev) {
+    if (!ev?.id) return null;
+
+    const parts = [];
+    if (ev.title) parts.push(ev.title);
+    if (ev.participants?.length) parts.push(ev.participants.join(' '));
+
+    const summary = cleanSummary(ev.summary);
+    if (summary) parts.push(summary);
+
+    const text = parts.join(' ').trim();
+    if (!text) return null;
+
+    return {
+        id: ev.id,
+        type: 'event',
+        floor: null,
+        text,
+    };
+}

-/**
- * 收集所有待索引文档
- *
- * @param {object[]} chunks - getAllChunks(chatId) 返回值
- * @param {object[]} events - store.json.events
- * @returns {object[]} 文档数组
- */
 function collectDocuments(chunks, events) {
    const docs = [];

-    // L1 chunks + 填充 floorDocIds
-    for (const chunk of (chunks || [])) {
+    for (const chunk of chunks || []) {
        if (!chunk?.chunkId || !chunk.text) continue;

        const floor = chunk.floor ?? -1;
@@ -101,48 +178,19 @@ function collectDocuments(chunks, events) {
        });

        if (floor >= 0) {
-            if (!floorDocIds.has(floor)) {
-                floorDocIds.set(floor, []);
-            }
+            if (!floorDocIds.has(floor)) floorDocIds.set(floor, []);
            floorDocIds.get(floor).push(chunk.chunkId);
        }
    }

-    // L2 events
-    for (const ev of (events || [])) {
-        if (!ev?.id) continue;
-        const parts = [];
-        if (ev.title) parts.push(ev.title);
-        if (ev.participants?.length) parts.push(ev.participants.join(' '));
-        const summary = cleanSummary(ev.summary);
-        if (summary) parts.push(summary);
-        const text = parts.join(' ').trim();
-        if (!text) continue;
-
-        docs.push({
-            id: ev.id,
-            type: 'event',
-            floor: null,
-            text,
-        });
+    for (const ev of events || []) {
+        const doc = buildEventDoc(ev);
+        if (doc) docs.push(doc);
    }

    return docs;
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 索引构建（分片，不阻塞主线程）
-// ─────────────────────────────────────────────────────────────────────────
-
-/** 每批添加的文档数 */
-const BUILD_BATCH_SIZE = 500;
-
-/**
- * 构建 MiniSearch 索引（分片异步）
- *
- * @param {object[]} docs - 文档数组
- * @returns {Promise<MiniSearch>}
- */
 async function buildIndexAsync(docs) {
    const T0 = performance.now();

@@ -158,49 +206,46 @@ async function buildIndexAsync(docs) {
        tokenize: tokenizeForIndex,
    });

-    if (!docs.length) {
-        return index;
-    }
+    if (!docs.length) return index;

-    // 分片添加，每批 BUILD_BATCH_SIZE 条后让出主线程
    for (let i = 0; i < docs.length; i += BUILD_BATCH_SIZE) {
        const batch = docs.slice(i, i + BUILD_BATCH_SIZE);
        index.addAll(batch);

-        // 非最后一批时让出主线程
        if (i + BUILD_BATCH_SIZE < docs.length) {
            await yieldToMain();
        }
    }

    const elapsed = Math.round(performance.now() - T0);
-    xbLog.info(MODULE_ID,
-        `索引构建完成: ${docs.length} 文档 (${elapsed}ms)`
-    );
-
+    xbLog.info(MODULE_ID, `Index built: ${docs.length} docs (${elapsed}ms)`);
    return index;
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 检索
-// ─────────────────────────────────────────────────────────────────────────
-
 /**
 * @typedef {object} LexicalSearchResult
- * @property {string[]} atomIds    - 命中的 L0 atom IDs
- * @property {Set<number>} atomFloors - 命中的 L0 楼层集合
- * @property {string[]} chunkIds   - 命中的 L1 chunk IDs
- * @property {Set<number>} chunkFloors - 命中的 L1 楼层集合
- * @property {string[]} eventIds   - 命中的 L2 event IDs
- * @property {object[]} chunkScores - chunk 命中详情 [{ chunkId, score }]
- * @property {number}   searchTime - 检索耗时 ms
+ * @property {string[]} atomIds - Reserved for backward compatibility (currently empty).
+ * @property {Set<number>} atomFloors - Reserved for backward compatibility (currently empty).
+ * @property {string[]} chunkIds - Matched L1 chunk ids sorted by weighted lexical score.
+ * @property {Set<number>} chunkFloors - Floor ids covered by matched chunks.
+ * @property {string[]} eventIds - Matched L2 event ids sorted by weighted lexical score.
+ * @property {object[]} chunkScores - Weighted lexical scores for matched chunks.
+ * @property {boolean} idfEnabled - Whether IDF stats are available for weighting.
+ * @property {number} idfDocCount - Number of lexical docs used to compute IDF.
+ * @property {Array<{term:string,idf:number}>} topIdfTerms - Top query terms by IDF.
+ * @property {string[]} queryTerms - Normalized query terms actually searched.
+ * @property {Record<string, Array<{floor:number, weightedScore:number, chunkId:string}>>} termFloorHits - Chunk-floor hits by term.
+ * @property {Array<{floor:number, score:number, hitTermsCount:number}>} floorLexScores - Aggregated lexical floor scores (debug).
+ * @property {number} termSearches - Number of per-term MiniSearch queries executed.
+ * @property {number} searchTime - Total lexical search time in milliseconds.
 */

 /**
- * 在词法索引中检索
+ * Search lexical index by terms, using per-term MiniSearch and IDF-weighted score aggregation.
+ * This keeps existing outputs compatible while adding observability fields.
 *
- * @param {MiniSearch} index - 索引实例
- * @param {string[]} terms - 查询词列表
+ * @param {MiniSearch} index
+ * @param {string[]} terms
 * @returns {LexicalSearchResult}
 */
 export function searchLexicalIndex(index, terms) {
@@ -213,6 +258,13 @@ export function searchLexicalIndex(index, terms) {
        chunkFloors: new Set(),
        eventIds: [],
        chunkScores: [],
+        idfEnabled: lexicalDocCount > 0,
+        idfDocCount: lexicalDocCount,
+        topIdfTerms: [],
+        queryTerms: [],
+        termFloorHits: {},
+        floorLexScores: [],
+        termSearches: 0,
        searchTime: 0,
    };

@@ -221,79 +273,111 @@ export function searchLexicalIndex(index, terms) {
        return result;
    }

-    // 用所有 terms 联合查询
-    const queryString = terms.join(' ');
+    const queryTerms = Array.from(new Set((terms || []).map(normalizeTerm).filter(Boolean)));
+    result.queryTerms = [...queryTerms];
+    const weightedScores = new Map(); // docId -> score
+    const hitMeta = new Map(); // docId -> { type, floor }
+    const idfPairs = [];
+    const termFloorHits = new Map(); // term -> [{ floor, weightedScore, chunkId }]
+    const floorLexAgg = new Map(); // floor -> { score, terms:Set<string> }

-    let hits;
-    try {
-        hits = index.search(queryString, {
-            boost: { text: 1 },
-            fuzzy: 0.2,
-            prefix: true,
-            combineWith: 'OR',
-            // 使用与索引相同的分词器
-            tokenize: tokenizeForIndex,
-        });
-    } catch (e) {
-        xbLog.warn(MODULE_ID, '检索失败', e);
-        result.searchTime = Math.round(performance.now() - T0);
-        return result;
+    for (const term of queryTerms) {
+        const idf = computeIdf(term);
+        idfPairs.push({ term, idf });
+
+        let hits = [];
+        try {
+            hits = index.search(term, {
+                boost: { text: 1 },
+                fuzzy: 0.2,
+                prefix: true,
+                combineWith: 'OR',
+                tokenize: tokenizeForIndex,
+            });
+        } catch (e) {
+            xbLog.warn(MODULE_ID, `Lexical term search failed: ${term}`, e);
+            continue;
+        }
+
+        result.termSearches += 1;
+
+        for (const hit of hits) {
+            const id = String(hit.id || '');
+            if (!id) continue;
+
+            const weighted = (hit.score || 0) * idf;
+            weightedScores.set(id, (weightedScores.get(id) || 0) + weighted);
+
+            if (!hitMeta.has(id)) {
+                hitMeta.set(id, {
+                    type: hit.type,
+                    floor: hit.floor,
+                });
+            }
+
+            if (hit.type === 'chunk' && typeof hit.floor === 'number' && hit.floor >= 0) {
+                if (!termFloorHits.has(term)) termFloorHits.set(term, []);
+                termFloorHits.get(term).push({
+                    floor: hit.floor,
+                    weightedScore: weighted,
+                    chunkId: id,
+                });
+
+                const floorAgg = floorLexAgg.get(hit.floor) || { score: 0, terms: new Set() };
+                floorAgg.score += weighted;
+                floorAgg.terms.add(term);
+                floorLexAgg.set(hit.floor, floorAgg);
+            }
+        }
    }

-    // 分类结果
-    const chunkIdSet = new Set();
-    const eventIdSet = new Set();
+    idfPairs.sort((a, b) => b.idf - a.idf);
+    result.topIdfTerms = idfPairs.slice(0, 5);
+    result.termFloorHits = Object.fromEntries(
+        [...termFloorHits.entries()].map(([term, hits]) => [term, hits]),
+    );
+    result.floorLexScores = [...floorLexAgg.entries()]
+        .map(([floor, info]) => ({
+            floor,
+            score: Number(info.score.toFixed(6)),
+            hitTermsCount: info.terms.size,
+        }))
+        .sort((a, b) => b.score - a.score);

-    for (const hit of hits) {
-        const type = hit.type;
-        const id = hit.id;
-        const floor = hit.floor;
+    const sortedHits = Array.from(weightedScores.entries())
+        .sort((a, b) => b[1] - a[1]);

-        switch (type) {
-            case 'chunk':
-                if (!chunkIdSet.has(id)) {
-                    chunkIdSet.add(id);
-                    result.chunkIds.push(id);
-                    result.chunkScores.push({ chunkId: id, score: hit.score });
-                    if (typeof floor === 'number' && floor >= 0) {
-                        result.chunkFloors.add(floor);
-                    }
-                }
-                break;
+    for (const [id, score] of sortedHits) {
+        const meta = hitMeta.get(id);
+        if (!meta) continue;

-            case 'event':
-                if (!eventIdSet.has(id)) {
-                    eventIdSet.add(id);
-                    result.eventIds.push(id);
-                }
-                break;
+        if (meta.type === 'chunk') {
+            result.chunkIds.push(id);
+            result.chunkScores.push({ chunkId: id, score });
+            if (typeof meta.floor === 'number' && meta.floor >= 0) {
+                result.chunkFloors.add(meta.floor);
+            }
+            continue;
+        }
+
+        if (meta.type === 'event') {
+            result.eventIds.push(id);
        }
    }

    result.searchTime = Math.round(performance.now() - T0);

-    xbLog.info(MODULE_ID,
-        `检索完成: terms=[${terms.slice(0, 5).join(',')}] → atoms=${result.atomIds.length} chunks=${result.chunkIds.length} events=${result.eventIds.length} (${result.searchTime}ms)`
+    xbLog.info(
+        MODULE_ID,
+        `Lexical search terms=[${queryTerms.slice(0, 5).join(',')}] chunks=${result.chunkIds.length} events=${result.eventIds.length} termSearches=${result.termSearches} (${result.searchTime}ms)`,
    );

    return result;
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 内部构建流程（收集数据 + 构建索引）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 收集数据并构建索引
- *
- * @param {string} chatId
- * @returns {Promise<{index: MiniSearch, fingerprint: string}>}
- */
 async function collectAndBuild(chatId) {
-    // 清空侧索引（全量重建）
    floorDocIds = new Map();

-    // 收集数据（不含 L0 atoms）
    const store = getSummaryStore();
    const events = store?.json?.events || [];

@@ -301,48 +385,44 @@ async function collectAndBuild(chatId) {
    try {
        chunks = await getAllChunks(chatId);
    } catch (e) {
-        xbLog.warn(MODULE_ID, '获取 chunks 失败', e);
+        xbLog.warn(MODULE_ID, 'Failed to load chunks', e);
    }

-    const fp = computeFingerprint(chunks.length, events.length);
+    const docs = collectDocuments(chunks, events);
+    const fp = computeFingerprintFromDocs(docs);

-    // 检查是否在收集过程中缓存已被其他调用更新
    if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {
        return { index: cachedIndex, fingerprint: fp };
    }

-    // 收集文档（同时填充 floorDocIds）
-    const docs = collectDocuments(chunks, events);
-
-    // 异步分片构建
+    rebuildIdfFromDocs(docs);
    const index = await buildIndexAsync(docs);

    return { index, fingerprint: fp };
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：getLexicalIndex（惰性获取）
-// ─────────────────────────────────────────────────────────────────────────
-
 /**
- * 获取词法索引（惰性构建 + 缓存）
- *
- * 如果缓存有效则直接返回；否则自动构建。
- * 如果正在构建中，等待构建完成。
- *
- * @returns {Promise<MiniSearch|null>}
+ * Expose IDF accessor for query-term selection in query-builder.
+ * If index stats are not ready, this gracefully falls back to idf=1.
 */
+export function getLexicalIdfAccessor() {
+    return {
+        enabled: lexicalDocCount > 0,
+        docCount: lexicalDocCount,
+        getIdf(term) {
+            return computeIdf(term);
+        },
+    };
+}
+
 export async function getLexicalIndex() {
    const { chatId } = getContext();
    if (!chatId) return null;

-    // 快速路径：如果缓存存在且 chatId 未变，则直接命中
-    // 指纹校验放到构建流程中完成，避免为指纹而额外读一次 IndexedDB
    if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
        return cachedIndex;
    }

-    // 正在构建中，等待结果
    if (building && buildPromise) {
        try {
            await buildPromise;
@@ -350,27 +430,23 @@ export async function getLexicalIndex() {
                return cachedIndex;
            }
        } catch {
-            // 构建失败，继续往下重建
+            // Continue to rebuild below.
        }
    }

-    // 需要重建（指纹将在 collectAndBuild 内部计算并写入缓存）
-    xbLog.info(MODULE_ID, `缓存失效，重建索引 (chatId=${chatId.slice(0, 8)})`);
+    xbLog.info(MODULE_ID, `Lexical cache miss; rebuilding (chatId=${chatId.slice(0, 8)})`);

    building = true;
    buildPromise = collectAndBuild(chatId);

    try {
        const { index, fingerprint } = await buildPromise;
-
-        // 原子替换缓存
        cachedIndex = index;
        cachedChatId = chatId;
        cachedFingerprint = fingerprint;
-
        return index;
    } catch (e) {
-        xbLog.error(MODULE_ID, '索引构建失败', e);
+        xbLog.error(MODULE_ID, 'Index build failed', e);
        return null;
    } finally {
        building = false;
@@ -378,74 +454,29 @@ export async function getLexicalIndex() {
    }
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：warmupIndex（异步预建）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 异步预建索引
- *
- * 在 CHAT_CHANGED 时调用，后台构建索引。
- * 不阻塞调用方，不返回结果。
- * 构建完成后缓存自动更新，后续 getLexicalIndex() 直接命中。
- *
- * 调用时机：
- * - handleChatChanged（实体注入后）
- * - L0 提取完成
- * - L2 总结完成
- */
 export function warmupIndex() {
    const { chatId } = getContext();
-    if (!chatId) return;
+    if (!chatId || building) return;

-    // 已在构建中，不重复触发
-    if (building) return;
-
-    // fire-and-forget
    getLexicalIndex().catch(e => {
-        xbLog.warn(MODULE_ID, '预热索引失败', e);
+        xbLog.warn(MODULE_ID, 'Warmup failed', e);
    });
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：invalidateLexicalIndex（缓存失效）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 使缓存失效（下次 getLexicalIndex / warmupIndex 时自动重建）
- *
- * 调用时机：
- * - CHAT_CHANGED
- * - L0 提取完成
- * - L2 总结完成
- */
 export function invalidateLexicalIndex() {
    if (cachedIndex) {
-        xbLog.info(MODULE_ID, '索引缓存已失效');
+        xbLog.info(MODULE_ID, 'Lexical index cache invalidated');
    }
    cachedIndex = null;
    cachedChatId = null;
    cachedFingerprint = null;
    floorDocIds = new Map();
+    clearIdfState();
 }

-// ─────────────────────────────────────────────────────────────────────────
-// 增量更新接口
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 为指定楼层添加 L1 chunks 到索引
- *
- * 先移除该楼层旧文档，再添加新文档。
- * 如果索引不存在（缓存失效），静默跳过（下次 getLexicalIndex 全量重建）。
- *
- * @param {number} floor - 楼层号
- * @param {object[]} chunks - chunk 对象列表（需有 chunkId、text、floor）
- */
 export function addDocumentsForFloor(floor, chunks) {
    if (!cachedIndex || !chunks?.length) return;

-    // 先移除旧文档
    removeDocumentsByFloor(floor);

    const docs = [];
@@ -453,30 +484,29 @@ export function addDocumentsForFloor(floor, chunks) {

    for (const chunk of chunks) {
        if (!chunk?.chunkId || !chunk.text) continue;
-        docs.push({
+
+        const doc = {
            id: chunk.chunkId,
            type: 'chunk',
            floor: chunk.floor ?? floor,
            text: chunk.text,
-        });
+        };
+        docs.push(doc);
        docIds.push(chunk.chunkId);
    }

-    if (docs.length > 0) {
-        cachedIndex.addAll(docs);
-        floorDocIds.set(floor, docIds);
-        xbLog.info(MODULE_ID, `增量添加: floor ${floor}, ${docs.length} 个 chunk`);
+    if (!docs.length) return;
+
+    cachedIndex.addAll(docs);
+    floorDocIds.set(floor, docIds);
+
+    for (const doc of docs) {
+        addDocumentIdf(doc.id, doc.text);
    }
+
+    xbLog.info(MODULE_ID, `Incremental add floor=${floor} chunks=${docs.length}`);
 }

-/**
- * 从索引中移除指定楼层的所有 L1 chunk 文档
- *
- * 使用 MiniSearch discard()（软删除）。
- * 如果索引不存在，静默跳过。
- *
- * @param {number} floor - 楼层号
- */
 export function removeDocumentsByFloor(floor) {
    if (!cachedIndex) return;

@@ -487,55 +517,39 @@ export function removeDocumentsByFloor(floor) {
        try {
            cachedIndex.discard(id);
        } catch {
-            // 文档可能不存在（已被全量重建替换）
+            // Ignore if the doc was already removed/rebuilt.
        }
+        removeDocumentIdf(id);
    }

    floorDocIds.delete(floor);
-    xbLog.info(MODULE_ID, `增量移除: floor ${floor}, ${docIds.length} 个文档`);
+    xbLog.info(MODULE_ID, `Incremental remove floor=${floor} chunks=${docIds.length}`);
 }

-/**
- * 将新 L2 事件添加到索引
- *
- * 如果事件 ID 已存在，先 discard 再 add（覆盖）。
- * 如果索引不存在，静默跳过。
- *
- * @param {object[]} events - 事件对象列表（需有 id、title、summary 等）
- */
 export function addEventDocuments(events) {
    if (!cachedIndex || !events?.length) return;

    const docs = [];

    for (const ev of events) {
-        if (!ev?.id) continue;
+        const doc = buildEventDoc(ev);
+        if (!doc) continue;

-        const parts = [];
-        if (ev.title) parts.push(ev.title);
-        if (ev.participants?.length) parts.push(ev.participants.join(' '));
-        const summary = cleanSummary(ev.summary);
-        if (summary) parts.push(summary);
-        const text = parts.join(' ').trim();
-        if (!text) continue;
-
-        // 覆盖：先尝试移除旧的
        try {
-            cachedIndex.discard(ev.id);
+            cachedIndex.discard(doc.id);
        } catch {
-            // 不存在则忽略
+            // Ignore if previous document does not exist.
        }
-
-        docs.push({
-            id: ev.id,
-            type: 'event',
-            floor: null,
-            text,
-        });
+        removeDocumentIdf(doc.id);
+        docs.push(doc);
    }

-    if (docs.length > 0) {
-        cachedIndex.addAll(docs);
-        xbLog.info(MODULE_ID, `增量添加: ${docs.length} 个事件`);
+    if (!docs.length) return;
+
+    cachedIndex.addAll(docs);
+    for (const doc of docs) {
+        addDocumentIdf(doc.id, doc.text);
    }
+
+    xbLog.info(MODULE_ID, `Incremental add events=${docs.length}`);
 }
--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -52,6 +52,10 @@ export function createMetrics() {
            eventHits: 0,
            searchTime: 0,
            indexReadyTime: 0,
+            idfEnabled: false,
+            idfDocCount: 0,
+            topIdfTerms: [],
+            termSearches: 0,
            eventFilteredByDense: 0,
            floorFilteredByDense: 0,
        },
@@ -97,6 +101,11 @@ export function createMetrics() {
            floorCandidates: 0,
            floorsSelected: 0,
            l0Collected: 0,
+            mustKeepTermsCount: 0,
+            mustKeepFloorsCount: 0,
+            mustKeepFloors: [],
+            droppedByRerankCount: 0,
+            lexHitButNotSelected: 0,
            rerankApplied: false,
            rerankFailed: false,
            beforeRerank: 0,
@@ -274,6 +283,20 @@ export function formatMetricsLog(metrics) {
    if (m.lexical.indexReadyTime > 0) {
        lines.push(`├─ index_ready_time: ${m.lexical.indexReadyTime}ms`);
    }
+    lines.push(`├─ idf_enabled: ${!!m.lexical.idfEnabled}`);
+    if (m.lexical.idfDocCount > 0) {
+        lines.push(`├─ idf_doc_count: ${m.lexical.idfDocCount}`);
+    }
+    if ((m.lexical.topIdfTerms || []).length > 0) {
+        const topIdfText = m.lexical.topIdfTerms
+            .slice(0, 5)
+            .map(x => `${x.term}:${x.idf}`)
+            .join(', ');
+        lines.push(`├─ top_idf_terms: [${topIdfText}]`);
+    }
+    if (m.lexical.termSearches > 0) {
+        lines.push(`├─ term_searches: ${m.lexical.termSearches}`);
+    }
    if (m.lexical.eventFilteredByDense > 0) {
        lines.push(`├─ event_filtered_by_dense: ${m.lexical.eventFilteredByDense}`);
    }
@@ -295,6 +318,20 @@ export function formatMetricsLog(metrics) {
    lines.push(`└─ time: ${m.fusion.time}ms`);
    lines.push('');

+    // Fusion Guard (must-keep lexical floors)
+    lines.push('[Fusion Guard] Lexical Must-Keep');
+    lines.push(`├─ must_keep_terms: ${m.evidence.mustKeepTermsCount || 0}`);
+    lines.push(`├─ must_keep_floors: ${m.evidence.mustKeepFloorsCount || 0}`);
+    if ((m.evidence.mustKeepFloors || []).length > 0) {
+        lines.push(`│   └─ floors: [${m.evidence.mustKeepFloors.slice(0, 10).join(', ')}]`);
+    }
+    if ((m.evidence.lexHitButNotSelected || 0) > 0) {
+        lines.push(`└─ lex_hit_but_not_selected: ${m.evidence.lexHitButNotSelected}`);
+    } else {
+        lines.push(`└─ lex_hit_but_not_selected: 0`);
+    }
+    lines.push('');
+
    // Constraint (L3 Facts)
    lines.push('[Constraint] L3 Facts - 世界约束');
    lines.push(`├─ total: ${m.constraint.total}`);
@@ -358,6 +395,9 @@ export function formatMetricsLog(metrics) {
        lines.push(`│   │   ├─ before: ${m.evidence.beforeRerank} floors`);
        lines.push(`│   │   ├─ after: ${m.evidence.afterRerank} floors`);
        lines.push(`│   │   └─ time: ${m.evidence.rerankTime}ms`);
+        if ((m.evidence.droppedByRerankCount || 0) > 0) {
+            lines.push(`│   ├─ dropped_normal: ${m.evidence.droppedByRerankCount}`);
+        }
        if (m.evidence.rerankScores) {
            const rs = m.evidence.rerankScores;
            lines.push(`│   ├─ rerank_scores: min=${rs.min}, max=${rs.max}, mean=${rs.mean}`);
--- a/modules/story-summary/vector/retrieval/query-builder.js
+++ b/modules/story-summary/vector/retrieval/query-builder.js
@@ -20,6 +20,7 @@

 import { getContext } from '../../../../../../../extensions.js';
 import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
+import { getLexicalIdfAccessor } from './lexical-index.js';
 import { getSummaryStore } from '../../data/store.js';
 import { filterText } from '../utils/text-filter.js';
 import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
@@ -106,6 +107,7 @@ export function computeLengthFactor(charCount) {
 function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
    if (!text) return [];

+    const idfAccessor = getLexicalIdfAccessor();
    const tokens = tokenizerTokenizeForIndex(text);
    const freq = new Map();
    for (const token of tokens) {
@@ -115,9 +117,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
    }

    return Array.from(freq.entries())
-        .sort((a, b) => b[1] - a[1])
+        .map(([term, tf]) => {
+            const idf = idfAccessor.enabled ? idfAccessor.getIdf(term) : 1;
+            return { term, tf, score: tf * idf };
+        })
+        .sort((a, b) => (b.score - a.score) || (b.tf - a.tf))
        .slice(0, maxTerms)
-        .map(([term]) => term);
+        .map(x => x.term);
 }

 // ─────────────────────────────────────────────────────────────────────────
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -42,6 +42,7 @@ import { getLexicalIndex, searchLexicalIndex } from './lexical-index.js';
 import { rerankChunks } from '../llm/reranker.js';
 import { createMetrics, calcSimilarityStats } from './metrics.js';
 import { diffuseFromSeeds } from './diffusion.js';
+import { tokenizeForIndex } from '../utils/tokenizer.js';

 const MODULE_ID = 'recall';

@@ -81,6 +82,11 @@ const CONFIG = {
    RERANK_TOP_N: 20,
    RERANK_MIN_SCORE: 0.10,

+    // Fusion guard: lexical must-keep floors
+    MUST_KEEP_MAX_FLOORS: 3,
+    MUST_KEEP_MIN_IDF: 2.2,
+    MUST_KEEP_CLUSTER_WINDOW: 2,
+
    // 因果链
    CAUSAL_CHAIN_MAX_DEPTH: 10,
    CAUSAL_INJECT_MAX: 30,
@@ -517,13 +523,107 @@ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) {
    return { top: scored.slice(0, cap), totalUnique };
 }

+function mapChunkFloorToAiFloor(floor, chat) {
+    let mapped = Number(floor);
+    if (!Number.isInteger(mapped) || mapped < 0) return null;
+
+    if (chat?.[mapped]?.is_user) {
+        const aiFloor = mapped + 1;
+        if (aiFloor < (chat?.length || 0) && !chat?.[aiFloor]?.is_user) {
+            mapped = aiFloor;
+        } else {
+            return null;
+        }
+    }
+    return mapped;
+}
+
+function isNonStopwordTerm(term) {
+    const norm = normalize(term);
+    if (!norm) return false;
+    const tokens = tokenizeForIndex(norm).map(normalize);
+    return tokens.includes(norm);
+}
+
+function buildMustKeepFloors(lexicalResult, lexicalTerms, atomFloorSet, chat) {
+    const out = {
+        terms: [],
+        floors: [],
+        floorSet: new Set(),
+        lexHitButNotSelected: 0,
+    };
+
+    if (!lexicalResult || !lexicalTerms?.length || !atomFloorSet?.size) return out;
+
+    const queryTermSet = new Set((lexicalTerms || []).map(normalize).filter(Boolean));
+    const topIdfTerms = (lexicalResult.topIdfTerms || [])
+        .filter(x => {
+            const term = normalize(x?.term);
+            if (!term) return false;
+            if (!queryTermSet.has(term)) return false;
+            if (term.length < 2) return false;
+            if (!isNonStopwordTerm(term)) return false;
+            if ((x?.idf || 0) < CONFIG.MUST_KEEP_MIN_IDF) return false;
+            const hits = lexicalResult.termFloorHits?.[term];
+            return Array.isArray(hits) && hits.length > 0;
+        })
+        .sort((a, b) => (b.idf || 0) - (a.idf || 0));
+
+    if (!topIdfTerms.length) return out;
+
+    out.terms = topIdfTerms.map(x => ({ term: normalize(x.term), idf: x.idf || 0 }));
+
+    const floorAgg = new Map(); // floor -> { lexHitScore, terms:Set<string> }
+    for (const { term } of out.terms) {
+        const hits = lexicalResult.termFloorHits?.[term] || [];
+        for (const hit of hits) {
+            const aiFloor = mapChunkFloorToAiFloor(hit.floor, chat);
+            if (aiFloor == null) continue;
+            if (!atomFloorSet.has(aiFloor)) continue;
+
+            const cur = floorAgg.get(aiFloor) || { lexHitScore: 0, terms: new Set() };
+            cur.lexHitScore += Number(hit?.weightedScore || 0);
+            cur.terms.add(term);
+            floorAgg.set(aiFloor, cur);
+        }
+    }
+
+    const candidates = [...floorAgg.entries()]
+        .map(([floor, info]) => {
+            const termCoverage = info.terms.size;
+            const finalFloorScore = info.lexHitScore * (1 + 0.2 * Math.max(0, termCoverage - 1));
+            return {
+                floor,
+                score: finalFloorScore,
+                termCoverage,
+                terms: [...info.terms],
+            };
+        })
+        .sort((a, b) => b.score - a.score);
+
+    out.lexHitButNotSelected = candidates.length;
+
+    // Cluster by floor distance and keep the highest score per cluster.
+    const selected = [];
+    for (const c of candidates) {
+        const conflict = selected.some(s => Math.abs(s.floor - c.floor) <= CONFIG.MUST_KEEP_CLUSTER_WINDOW);
+        if (conflict) continue;
+        selected.push(c);
+        if (selected.length >= CONFIG.MUST_KEEP_MAX_FLOORS) break;
+    }
+
+    out.floors = selected;
+    out.floorSet = new Set(selected.map(x => x.floor));
+    return out;
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // [Stage 6] Floor 融合 + Rerank
 // ═══════════════════════════════════════════════════════════════════════════

-async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) {
+async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, lexicalTerms, metrics) {
    const { chatId, chat, name1, name2 } = getContext();
-    if (!chatId) return { l0Selected: [], l1ScoredByFloor: new Map() };
+    if (!chatId) return { l0Selected: [], l1ScoredByFloor: new Map(), mustKeepFloors: [] };

    const T_Start = performance.now();

@@ -558,17 +658,8 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    for (const { chunkId, score } of (lexicalResult?.chunkScores || [])) {
        const match = chunkId?.match(/^c-(\d+)-/);
        if (!match) continue;
-        let floor = parseInt(match[1], 10);
-
-        // USER floor → AI floor 映射
-        if (chat?.[floor]?.is_user) {
-            const aiFloor = floor + 1;
-            if (aiFloor < chat.length && !chat[aiFloor]?.is_user) {
-                floor = aiFloor;
-            } else {
-                continue;
-            }
-        }
+        const floor = mapChunkFloorToAiFloor(parseInt(match[1], 10), chat);
+        if (floor == null) continue;

        // 预过滤：必须有 L0 atoms
        if (!atomFloorSet.has(floor)) continue;
@@ -600,6 +691,12 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        metrics.lexical.floorFilteredByDense = lexFloorFilteredByDense;
    }

+    // ─────────────────────────────────────────────────────────────────
+    // 6b.5 Fusion Guard: lexical must-keep floors
+    // ─────────────────────────────────────────────────────────────────
+
+    const mustKeep = buildMustKeepFloors(lexicalResult, lexicalTerms, atomFloorSet, chat);
+
    // ─────────────────────────────────────────────────────────────────
    // 6c. Floor W-RRF 融合
    // ─────────────────────────────────────────────────────────────────
@@ -617,6 +714,10 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        metrics.fusion.denseAggMethod = 'maxSim';
        metrics.fusion.lexDensityBonus = CONFIG.LEX_DENSITY_BONUS;
        metrics.evidence.floorCandidates = fusedFloors.length;
+        metrics.evidence.mustKeepTermsCount = mustKeep.terms.length;
+        metrics.evidence.mustKeepFloorsCount = mustKeep.floors.length;
+        metrics.evidence.mustKeepFloors = mustKeep.floors.map(x => x.floor).slice(0, 10);
+        metrics.evidence.lexHitButNotSelected = Math.max(0, mustKeep.lexHitButNotSelected - mustKeep.floors.length);
    }

    if (fusedFloors.length === 0) {
@@ -628,7 +729,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
            metrics.evidence.l1CosineTime = 0;
            metrics.evidence.rerankApplied = false;
        }
-        return { l0Selected: [], l1ScoredByFloor: new Map() };
+        return { l0Selected: [], l1ScoredByFloor: new Map(), mustKeepFloors: [] };
    }

    // ─────────────────────────────────────────────────────────────────
@@ -650,8 +751,10 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    // 6e. 构建 rerank documents（每个 floor: USER chunks + AI chunks）
    // ─────────────────────────────────────────────────────────────────

+    const normalFloors = fusedFloors.filter(f => !mustKeep.floorSet.has(f.id));
+
    const rerankCandidates = [];
-    for (const f of fusedFloors) {
+    for (const f of normalFloors) {
        const aiFloor = f.id;
        const userFloor = aiFloor - 1;

@@ -698,6 +801,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        metrics.evidence.rerankApplied = true;
        metrics.evidence.beforeRerank = rerankCandidates.length;
        metrics.evidence.afterRerank = reranked.length;
+        metrics.evidence.droppedByRerankCount = Math.max(0, rerankCandidates.length - reranked.length);
        metrics.evidence.rerankFailed = reranked.some(c => c._rerankFailed);
        metrics.evidence.rerankTime = rerankTime;
        metrics.timing.evidenceRerank = rerankTime;
@@ -722,9 +826,12 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    // 6g. 收集 L0 atoms
    // ─────────────────────────────────────────────────────────────────

-    // 仅保留“真实 dense 命中”的 L0 原子：
-    // 旧逻辑按 floor 全塞，容易把同层无关原子带进来。
-    const atomById = new Map(getStateAtoms().map(a => [a.atomId, a]));
+    // Floor-based L0 collection:
+    // once a floor is selected by fusion/rerank, L0 atoms come from that floor.
+    // Dense anchor hits are used as similarity signals (ranking), not hard admission.
+    const allAtoms = getStateAtoms();
+    const atomById = new Map(allAtoms.map(a => [a.atomId, a]));
+    const anchorSimilarityByAtomId = new Map((anchorHits || []).map(h => [h.atomId, h.similarity || 0]));
    const matchedAtomsByFloor = new Map();
    for (const hit of (anchorHits || [])) {
        const atom = hit.atom || atomById.get(hit.atomId);
@@ -739,15 +846,42 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        arr.sort((a, b) => b.similarity - a.similarity);
    }

+    const mustKeepMissing = mustKeep.floors
+        .filter(mf => !reranked.some(r => r.floor === mf.floor))
+        .map(mf => ({
+            floor: mf.floor,
+            _rerankScore: 0.12 + Math.min(0.05, 0.01 * (mf.termCoverage || 1)),
+            _isMustKeep: true,
+        }));
+
+    const finalFloorItems = [
+        ...reranked.map(r => ({ ...r, _isMustKeep: false })),
+        ...mustKeepMissing,
+    ];
+
+    const allAtomsByFloor = new Map();
+    for (const atom of allAtoms) {
+        const f = Number(atom?.floor);
+        if (!Number.isInteger(f) || f < 0) continue;
+        if (!allAtomsByFloor.has(f)) allAtomsByFloor.set(f, []);
+        allAtomsByFloor.get(f).push(atom);
+    }
+
    const l0Selected = [];

-    for (const item of reranked) {
+    for (const item of finalFloorItems) {
        const floor = item.floor;
-        const rerankScore = item._rerankScore || 0;
+        const rerankScore = Number.isFinite(item?._rerankScore) ? item._rerankScore : 0;

-        // 仅收集该 floor 中真实命中的 L0 atoms
-        const floorMatchedAtoms = matchedAtomsByFloor.get(floor) || [];
-        for (const { atom, similarity } of floorMatchedAtoms) {
+        const floorAtoms = allAtomsByFloor.get(floor) || [];
+        floorAtoms.sort((a, b) => {
+            const sa = anchorSimilarityByAtomId.get(a.atomId) || 0;
+            const sb = anchorSimilarityByAtomId.get(b.atomId) || 0;
+            return sb - sa;
+        });
+
+        for (const atom of floorAtoms) {
+            const similarity = anchorSimilarityByAtomId.get(atom.atomId) || 0;
            l0Selected.push({
                id: `anchor-${atom.atomId}`,
                atomId: atom.atomId,
@@ -762,7 +896,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    }

    if (metrics) {
-        metrics.evidence.floorsSelected = reranked.length;
+        metrics.evidence.floorsSelected = finalFloorItems.length;
        metrics.evidence.l0Collected = l0Selected.length;

        metrics.evidence.l1Pulled = 0;
@@ -777,10 +911,14 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    }

    xbLog.info(MODULE_ID,
-        `Evidence: ${denseFloorRank.length} dense floors + ${lexFloorRank.length} lex floors (${lexFloorFilteredByDense} lex filtered by dense) → fusion=${fusedFloors.length} → rerank=${reranked.length} floors → L0=${l0Selected.length} (${totalTime}ms)`
+        `Evidence: ${denseFloorRank.length} dense floors + ${lexFloorRank.length} lex floors (${lexFloorFilteredByDense} lex filtered by dense) → fusion=${fusedFloors.length} → rerank(normal)=${reranked.length} + mustKeep=${mustKeepMissing.length} floors → L0=${l0Selected.length} (${totalTime}ms)`
    );

-    return { l0Selected, l1ScoredByFloor };
+    return {
+        l0Selected,
+        l1ScoredByFloor,
+        mustKeepFloors: mustKeep.floors.map(x => x.floor),
+    };
 }

 // ═══════════════════════════════════════════════════════════════════════════
@@ -965,6 +1103,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
            focusEntities: [],
            focusTerms: [],
            focusCharacters: [],
+            mustKeepFloors: [],
            elapsed: metrics.timing.total,
            logText: 'No events.',
            metrics,
@@ -984,6 +1123,12 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        : CONFIG.LAST_MESSAGES_K;
    const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);

+    // Non-blocking preload: keep recall latency stable.
+    // If not ready yet, query-builder will gracefully fall back to TF terms.
+    getLexicalIndex().catch((e) => {
+        xbLog.warn(MODULE_ID, 'Preload lexical index failed; continue with TF fallback', e);
+    });
+
    const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
    const focusTerms = bundle.focusTerms || bundle.focusEntities || [];
    const focusCharacters = bundle.focusCharacters || [];
@@ -1015,6 +1160,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
            focusEntities: focusTerms,
            focusTerms,
            focusCharacters,
+            mustKeepFloors: [],
            elapsed: metrics.timing.total,
            logText: 'No query segments.',
            metrics,
@@ -1037,6 +1183,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
                focusEntities: focusTerms,
                focusTerms,
                focusCharacters,
+                mustKeepFloors: [],
                elapsed: metrics.timing.total,
                logText: 'Embedding failed (round 1, after retry).',
                metrics,
@@ -1051,6 +1198,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
            focusEntities: focusTerms,
            focusTerms,
            focusCharacters,
+            mustKeepFloors: [],
            elapsed: metrics.timing.total,
            logText: 'Empty query vectors (round 1).',
            metrics,
@@ -1071,6 +1219,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
            focusEntities: focusTerms,
            focusTerms,
            focusCharacters,
+            mustKeepFloors: [],
            elapsed: metrics.timing.total,
            logText: 'Weighted average produced empty vector.',
            metrics,
@@ -1161,6 +1310,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        atomIds: [], atomFloors: new Set(),
        chunkIds: [], chunkFloors: new Set(),
        eventIds: [], chunkScores: [], searchTime: 0,
+        idfEnabled: false, idfDocCount: 0, topIdfTerms: [], termSearches: 0,
+        queryTerms: [],
+        termFloorHits: {},
+        floorLexScores: [],
    };

    let indexReadyTime = 0;
@@ -1184,6 +1337,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        metrics.lexical.searchTime = lexicalResult.searchTime || 0;
        metrics.lexical.indexReadyTime = indexReadyTime;
        metrics.lexical.terms = bundle.lexicalTerms.slice(0, 10);
+        metrics.lexical.idfEnabled = !!lexicalResult.idfEnabled;
+        metrics.lexical.idfDocCount = lexicalResult.idfDocCount || 0;
+        metrics.lexical.topIdfTerms = lexicalResult.topIdfTerms || [];
+        metrics.lexical.termSearches = lexicalResult.termSearches || 0;
    }

    // 合并 L2 events（lexical 命中但 dense 未命中的 events）
@@ -1238,18 +1395,19 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    }

    xbLog.info(MODULE_ID,
-        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
+        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} idfEnabled=${lexicalResult.idfEnabled ? 'yes' : 'no'} idfDocs=${lexicalResult.idfDocCount || 0} termSearches=${lexicalResult.termSearches || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
    );

    // ═══════════════════════════════════════════════════════════════════
    // 阶段 6: Floor 粒度融合 + Rerank + L1 配对
    // ═══════════════════════════════════════════════════════════════════

-    const { l0Selected, l1ScoredByFloor } = await locateAndPullEvidence(
+    const { l0Selected, l1ScoredByFloor, mustKeepFloors } = await locateAndPullEvidence(
        anchorHits,
        queryVector_v1,
        bundle.rerankQuery,
        lexicalResult,
+        bundle.lexicalTerms,
        metrics
    );

@@ -1379,6 +1537,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`);
    console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} evtMerged=+${lexicalEventCount} evtFiltered=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (idx=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`);
    console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
+    console.log(`Fusion Guard: mustKeepTerms=${metrics.evidence.mustKeepTermsCount || 0} mustKeepFloors=[${(metrics.evidence.mustKeepFloors || []).join(', ')}]`);
    console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0} → ${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
    console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
    console.log(`Events: ${eventHits.length} hits (l0Linked=+${l0LinkedCount}), ${causalChain.length} causal`);
@@ -1393,6 +1552,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        focusEntities: focusTerms,
        focusTerms,
        focusCharacters,
+        mustKeepFloors: mustKeepFloors || [],
        elapsed: metrics.timing.total,
        metrics,
    };