Fix lint warnings and update retrieval modules

2026-02-09 15:26:43 +08:00
parent 5b9da7aaf4
commit 8131d6a15f
10 changed files with 1761 additions and 763 deletions
--- a/modules/story-summary/vector/retrieval/entity-lexicon.js
+++ b/modules/story-summary/vector/retrieval/entity-lexicon.js
@@ -0,0 +1,151 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// entity-lexicon.js - 实体词典（确定性，无 LLM）
+//
+// 职责：
+// 1. 从已有结构化存储构建可信实体词典
+// 2. 从文本中提取命中的实体
+//
+// 硬约束：name1 永不进入词典
+// ═══════════════════════════════════════════════════════════════════════════
+
+/**
+ * 标准化字符串（用于实体匹配）
+ * @param {string} s
+ * @returns {string}
+ */
+function normalize(s) {
+    return String(s || '')
+        .normalize('NFKC')
+        .replace(/[\u200B-\u200D\uFEFF]/g, '')
+        .trim()
+        .toLowerCase();
+}
+
+/**
+ * 构建实体词典
+ *
+ * 来源（按可信度）：
+ *   1. store.json.characters.main  — 已确认主要角色
+ *   2. store.json.arcs[].name      — 弧光对象
+ *   3. context.name2               — 当前角色
+ *   4. store.json.facts[].s        — L3 事实主语
+ *
+ * 硬约束：永远排除 normalize(context.name1)
+ *
+ * @param {object} store  - getSummaryStore() 返回值
+ * @param {object} context - { name1: string, name2: string }
+ * @returns {Set<string>} 标准化后的实体集合
+ */
+export function buildEntityLexicon(store, context) {
+    const lexicon = new Set();
+
+    // 内部辅助：添加非空实体
+    const add = (raw) => {
+        const n = normalize(raw);
+        if (n && n.length >= 2) lexicon.add(n);
+    };
+
+    // 1. 主要角色
+    const main = store?.json?.characters?.main || [];
+    for (const m of main) {
+        add(typeof m === 'string' ? m : m.name);
+    }
+
+    // 2. 弧光角色
+    const arcs = store?.json?.arcs || [];
+    for (const a of arcs) {
+        add(a.name);
+    }
+
+    // 3. 当前角色 name2
+    if (context?.name2) {
+        add(context.name2);
+    }
+
+    // 4. L3 facts 主语
+    const facts = store?.json?.facts || [];
+    for (const f of facts) {
+        if (f.retracted) continue;
+        add(f.s);
+    }
+
+    // ★ 硬约束：删除 name1
+    if (context?.name1) {
+        lexicon.delete(normalize(context.name1));
+    }
+
+    return lexicon;
+}
+
+/**
+ * 构建"原词形 → 标准化"映射表
+ * 用于从 lexicon 反查原始显示名
+ *
+ * @param {object} store
+ * @param {object} context
+ * @returns {Map<string, string>} normalize(name) → 原词形
+ */
+export function buildDisplayNameMap(store, context) {
+    const map = new Map();
+
+    const register = (raw) => {
+        const n = normalize(raw);
+        if (n && n.length >= 2 && !map.has(n)) {
+            map.set(n, String(raw).trim());
+        }
+    };
+
+    const main = store?.json?.characters?.main || [];
+    for (const m of main) {
+        register(typeof m === 'string' ? m : m.name);
+    }
+
+    const arcs = store?.json?.arcs || [];
+    for (const a of arcs) {
+        register(a.name);
+    }
+
+    if (context?.name2) register(context.name2);
+
+    const facts = store?.json?.facts || [];
+    for (const f of facts) {
+        if (!f.retracted) register(f.s);
+    }
+
+    // ★ 硬约束：删除 name1
+    if (context?.name1) {
+        map.delete(normalize(context.name1));
+    }
+
+    return map;
+}
+
+/**
+ * 从文本中提取命中的实体
+ *
+ * 逻辑：遍历词典，检查文本中是否包含（不区分大小写）
+ * 返回命中的实体原词形（去重）
+ *
+ * @param {string} text - 清洗后的文本
+ * @param {Set<string>} lexicon - 标准化后的实体集合
+ * @param {Map<string, string>} displayMap - normalize → 原词形
+ * @returns {string[]} 命中的实体（原词形）
+ */
+export function extractEntitiesFromText(text, lexicon, displayMap) {
+    if (!text || !lexicon?.size) return [];
+
+    const textNorm = normalize(text);
+    const hits = [];
+    const seen = new Set();
+
+    for (const entity of lexicon) {
+        if (textNorm.includes(entity) && !seen.has(entity)) {
+            seen.add(entity);
+            // 优先返回原词形
+            const display = displayMap?.get(entity) || entity;
+            hits.push(display);
+        }
+    }
+
+    return hits;
+}
--- a/modules/story-summary/vector/retrieval/lexical-index.js
+++ b/modules/story-summary/vector/retrieval/lexical-index.js
@@ -0,0 +1,369 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// lexical-index.js - MiniSearch 词法检索索引
+//
+// 职责：
+// 1. 对 L0 atoms + L1 chunks + L2 events 建立词法索引
+// 2. 提供词法检索接口（专名精确匹配兜底）
+// 3. 惰性构建 + 缓存失效机制
+//
+// 索引存储：纯内存（不持久化）
+// 重建时机：CHAT_CHANGED / L0提取完成 / L2总结完成
+// ═══════════════════════════════════════════════════════════════════════════
+
+import MiniSearch from '../../../../libs/minisearch.mjs';
+import { getContext } from '../../../../../../../extensions.js';
+import { getSummaryStore } from '../../data/store.js';
+import { getStateAtoms } from '../storage/state-store.js';
+import { getAllChunks } from '../storage/chunk-store.js';
+import { xbLog } from '../../../../core/debug-core.js';
+
+const MODULE_ID = 'lexical-index';
+
+// ─────────────────────────────────────────────────────────────────────────
+// 缓存
+// ─────────────────────────────────────────────────────────────────────────
+
+let cachedIndex = null;
+let cachedChatId = null;
+let cachedFingerprint = null; // atoms.length + chunks.length + events.length 的简单指纹
+
+// ─────────────────────────────────────────────────────────────────────────
+// 工具函数
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 清理事件摘要（移除楼层标记）
+ * @param {string} summary
+ * @returns {string}
+ */
+function cleanSummary(summary) {
+    return String(summary || '')
+        .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
+        .trim();
+}
+
+/**
+ * 计算缓存指纹（用于判断是否需要重建）
+ * @param {number} atomCount
+ * @param {number} chunkCount
+ * @param {number} eventCount
+ * @returns {string}
+ */
+function computeFingerprint(atomCount, chunkCount, eventCount) {
+    return `${atomCount}:${chunkCount}:${eventCount}`;
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 索引构建
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 构建 MiniSearch 索引
+ *
+ * 索引三类文档：
+ * - L0 atoms: { id: atomId, type: 'atom', floor, text: semantic }
+ * - L1 chunks: { id: chunkId, type: 'chunk', floor, text: chunk.text }
+ * - L2 events: { id: eventId, type: 'event', floor: null, text: title + participants + summary }
+ *
+ * @param {object[]} atoms  - getStateAtoms() 返回值
+ * @param {object[]} chunks - getAllChunks(chatId) 返回值
+ * @param {object[]} events - store.json.events
+ * @returns {MiniSearch}
+ */
+export function buildLexicalIndex(atoms, chunks, events) {
+    const T0 = performance.now();
+
+    const index = new MiniSearch({
+        fields: ['text'],
+        storeFields: ['type', 'floor'],
+        idField: 'id',
+        searchOptions: {
+            boost: { text: 1 },
+            fuzzy: 0.2,
+            prefix: true,
+        },
+        // 中文友好的 tokenizer：按字符 bigram + 空格/标点分词
+        tokenize: chineseTokenize,
+    });
+
+    const docs = [];
+
+    // L0 atoms
+    for (const atom of (atoms || [])) {
+        if (!atom?.atomId || !atom.semantic) continue;
+        docs.push({
+            id: atom.atomId,
+            type: 'atom',
+            floor: atom.floor ?? -1,
+            text: atom.semantic,
+        });
+    }
+
+    // L1 chunks
+    for (const chunk of (chunks || [])) {
+        if (!chunk?.chunkId || !chunk.text) continue;
+        docs.push({
+            id: chunk.chunkId,
+            type: 'chunk',
+            floor: chunk.floor ?? -1,
+            text: chunk.text,
+        });
+    }
+
+    // L2 events
+    for (const ev of (events || [])) {
+        if (!ev?.id) continue;
+        const parts = [];
+        if (ev.title) parts.push(ev.title);
+        if (ev.participants?.length) parts.push(ev.participants.join(' '));
+        const summary = cleanSummary(ev.summary);
+        if (summary) parts.push(summary);
+        const text = parts.join(' ').trim();
+        if (!text) continue;
+
+        docs.push({
+            id: ev.id,
+            type: 'event',
+            floor: null,
+            text,
+        });
+    }
+
+    if (docs.length > 0) {
+        index.addAll(docs);
+    }
+
+    const elapsed = Math.round(performance.now() - T0);
+    xbLog.info(MODULE_ID, `索引构建完成: ${docs.length} 文档 (atoms=${atoms?.length || 0}, chunks=${chunks?.length || 0}, events=${events?.length || 0}) ${elapsed}ms`);
+
+    return index;
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 中文 Tokenizer
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 中文友好的分词器
+ *
+ * 策略：
+ * 1. 连续中文字符 → 滑动 bigram（"黄英梅" → "黄英", "英梅"）
+ * 2. 连续非中文字符 → 按空格/标点分割
+ * 3. 保留完整中文词（2-4字）作为额外 token
+ *
+ * @param {string} text
+ * @returns {string[]}
+ */
+function chineseTokenize(text) {
+    if (!text) return [];
+
+    const tokens = [];
+    const s = String(text).toLowerCase();
+
+    // 分离中文段和非中文段
+    const segments = s.split(/([\u4e00-\u9fff]+)/g);
+
+    for (const seg of segments) {
+        if (!seg) continue;
+
+        // 中文段：bigram + 完整段（如果 2-6 字）
+        if (/^[\u4e00-\u9fff]+$/.test(seg)) {
+            // 完整段作为一个 token（如果长度合适）
+            if (seg.length >= 2 && seg.length <= 6) {
+                tokens.push(seg);
+            }
+
+            // bigram
+            for (let i = 0; i < seg.length - 1; i++) {
+                tokens.push(seg.slice(i, i + 2));
+            }
+
+            // trigram（对 3+ 字的段）
+            for (let i = 0; i < seg.length - 2; i++) {
+                tokens.push(seg.slice(i, i + 3));
+            }
+        } else {
+            // 非中文段：按空格/标点分割
+const words = seg.split(/[\s\-_.,;:!?'"()[\]{}<>/\\|@#$%^&*+=~`]+/);
+            for (const w of words) {
+                const trimmed = w.trim();
+                if (trimmed.length >= 2) {
+                    tokens.push(trimmed);
+                }
+            }
+        }
+    }
+
+    return tokens;
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 检索
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * @typedef {object} LexicalSearchResult
+ * @property {string[]} atomIds    - 命中的 L0 atom IDs
+ * @property {Set<number>} atomFloors - 命中的 L0 楼层集合
+ * @property {string[]} chunkIds   - 命中的 L1 chunk IDs
+ * @property {Set<number>} chunkFloors - 命中的 L1 楼层集合
+ * @property {string[]} eventIds   - 命中的 L2 event IDs
+ * @property {object[]} chunkScores - chunk 命中详情 [{ chunkId, score }]
+ * @property {number}   searchTime - 检索耗时 ms
+ */
+
+/**
+ * 在词法索引中检索
+ *
+ * @param {MiniSearch} index - 索引实例
+ * @param {string[]} terms - 查询词列表
+ * @returns {LexicalSearchResult}
+ */
+export function searchLexicalIndex(index, terms) {
+    const T0 = performance.now();
+
+    const result = {
+        atomIds: [],
+        atomFloors: new Set(),
+        chunkIds: [],
+        chunkFloors: new Set(),
+        eventIds: [],
+        chunkScores: [],
+        searchTime: 0,
+    };
+
+    if (!index || !terms?.length) {
+        result.searchTime = Math.round(performance.now() - T0);
+        return result;
+    }
+
+    // 用所有 terms 联合查询
+    const queryString = terms.join(' ');
+
+    let hits;
+    try {
+        hits = index.search(queryString, {
+            boost: { text: 1 },
+            fuzzy: 0.2,
+            prefix: true,
+            combineWith: 'OR',
+        });
+    } catch (e) {
+        xbLog.warn(MODULE_ID, '检索失败', e);
+        result.searchTime = Math.round(performance.now() - T0);
+        return result;
+    }
+
+    // 分类结果
+    const atomIdSet = new Set();
+    const chunkIdSet = new Set();
+    const eventIdSet = new Set();
+
+    for (const hit of hits) {
+        const type = hit.type;
+        const id = hit.id;
+        const floor = hit.floor;
+
+        switch (type) {
+            case 'atom':
+                if (!atomIdSet.has(id)) {
+                    atomIdSet.add(id);
+                    result.atomIds.push(id);
+                    if (typeof floor === 'number' && floor >= 0) {
+                        result.atomFloors.add(floor);
+                    }
+                }
+                break;
+
+            case 'chunk':
+                if (!chunkIdSet.has(id)) {
+                    chunkIdSet.add(id);
+                    result.chunkIds.push(id);
+                    result.chunkScores.push({ chunkId: id, score: hit.score });
+                    if (typeof floor === 'number' && floor >= 0) {
+                        result.chunkFloors.add(floor);
+                    }
+                }
+                break;
+
+            case 'event':
+                if (!eventIdSet.has(id)) {
+                    eventIdSet.add(id);
+                    result.eventIds.push(id);
+                }
+                break;
+        }
+    }
+
+    result.searchTime = Math.round(performance.now() - T0);
+
+    xbLog.info(MODULE_ID,
+        `检索完成: terms=[${terms.slice(0, 5).join(',')}] → atoms=${result.atomIds.length} chunks=${result.chunkIds.length} events=${result.eventIds.length} (${result.searchTime}ms)`
+    );
+
+    return result;
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 惰性缓存管理
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 获取词法索引（惰性构建 + 缓存）
+ *
+ * 如果缓存有效则直接返回；否则自动构建。
+ * 缓存失效条件：chatId 变化 / 数据指纹变化 / 手动 invalidate
+ *
+ * @returns {Promise<MiniSearch>}
+ */
+export async function getLexicalIndex() {
+    const { chatId } = getContext();
+    if (!chatId) return null;
+
+    // 收集当前数据
+    const atoms = getStateAtoms() || [];
+    const store = getSummaryStore();
+    const events = store?.json?.events || [];
+
+    let chunks = [];
+    try {
+        chunks = await getAllChunks(chatId);
+    } catch (e) {
+        xbLog.warn(MODULE_ID, '获取 chunks 失败', e);
+    }
+
+    const fp = computeFingerprint(atoms.length, chunks.length, events.length);
+
+    // 缓存命中
+    if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {
+        return cachedIndex;
+    }
+
+    // 重建
+    xbLog.info(MODULE_ID, `缓存失效，重建索引 (chatId=${chatId.slice(0, 8)}, fp=${fp})`);
+
+    const index = buildLexicalIndex(atoms, chunks, events);
+
+    cachedIndex = index;
+    cachedChatId = chatId;
+    cachedFingerprint = fp;
+
+    return index;
+}
+
+/**
+ * 使缓存失效（下次 getLexicalIndex 时自动重建）
+ *
+ * 调用时机：
+ * - CHAT_CHANGED
+ * - L0 提取完成（handleAnchorGenerate 完成后）
+ * - L2 总结完成（onComplete 回调中）
+ */
+export function invalidateLexicalIndex() {
+    if (cachedIndex) {
+        xbLog.info(MODULE_ID, '索引缓存已失效');
+    }
+    cachedIndex = null;
+    cachedChatId = null;
+    cachedFingerprint = null;
+}
--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -1,5 +1,5 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Metrics Collector (v2 - 统一命名)
+// Story Summary - Metrics Collector (v3 - Deterministic Query + Hybrid + W-RRF)
 //
 // 命名规范：
 // - 存储层用 L0/L1/L2/L3（StateAtom/Chunk/Event/Fact）
@@ -8,21 +8,44 @@

 /**
 * 创建空的指标对象
- * @returns {object} 指标对象
+ * @returns {object}
 */
 export function createMetrics() {
    return {
+        // Query Build - 查询构建
+        query: {
+            buildTime: 0,
+            refineTime: 0,
+        },
+
        // Anchor (L0 StateAtoms) - 语义锚点
        anchor: {
            needRecall: false,
            focusEntities: [],
-            queries: [],
-            queryExpansionTime: 0,
            matched: 0,
            floorsHit: 0,
            topHits: [],
        },

+        // Lexical (MiniSearch) - 词法检索
+        lexical: {
+            terms: [],
+            atomHits: 0,
+            chunkHits: 0,
+            eventHits: 0,
+            searchTime: 0,
+        },
+
+        // Fusion (W-RRF) - 多路融合
+        fusion: {
+            denseCount: 0,
+            lexCount: 0,
+            anchorCount: 0,
+            totalUnique: 0,
+            afterCap: 0,
+            time: 0,
+        },
+
        // Constraint (L3 Facts) - 世界约束
        constraint: {
            total: 0,
@@ -37,7 +60,7 @@ export function createMetrics() {
            inStore: 0,
            considered: 0,
            selected: 0,
-            byRecallType: { direct: 0, related: 0, causal: 0 },
+            byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0 },
            similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
            entityFilter: null,
            causalChainDepth: 0,
@@ -50,7 +73,7 @@ export function createMetrics() {
        evidence: {
            floorsFromAnchors: 0,
            chunkTotal: 0,
-            chunkAfterCoarse: 0,
+            denseCoarse: 0,
            merged: 0,
            mergedByType: { anchorVirtual: 0, chunkReal: 0 },
            selected: 0,
@@ -93,8 +116,11 @@ export function createMetrics() {

        // Timing - 计时
        timing: {
-            queryExpansion: 0,
+            queryBuild: 0,
+            queryRefine: 0,
            anchorSearch: 0,
+            lexicalSearch: 0,
+            fusion: 0,
            constraintFilter: 0,
            eventRetrieval: 0,
            evidenceRetrieval: 0,
@@ -109,6 +135,7 @@ export function createMetrics() {
            constraintCoverage: 100,
            eventPrecisionProxy: 0,
            evidenceDensity: 0,
+            chunkRealRatio: 0,
            potentialIssues: [],
        },
    };
@@ -116,7 +143,7 @@ export function createMetrics() {

 /**
 * 计算相似度分布统计
- * @param {number[]} similarities - 相似度数组
+ * @param {number[]} similarities
 * @returns {{min: number, max: number, mean: number, median: number}}
 */
 export function calcSimilarityStats(similarities) {
@@ -137,8 +164,8 @@ export function calcSimilarityStats(similarities) {

 /**
 * 格式化指标为可读日志
- * @param {object} metrics - 指标对象
- * @returns {string} 格式化后的日志
+ * @param {object} metrics
+ * @returns {string}
 */
 export function formatMetricsLog(metrics) {
    const m = metrics;
@@ -150,18 +177,41 @@ export function formatMetricsLog(metrics) {
    lines.push('════════════════════════════════════════');
    lines.push('');

+    // Query Build
+    lines.push('[Query] 查询构建');
+    lines.push(`├─ build_time: ${m.query.buildTime}ms`);
+    lines.push(`└─ refine_time: ${m.query.refineTime}ms`);
+    lines.push('');
+
    // Anchor (L0 StateAtoms)
    lines.push('[Anchor] L0 StateAtoms - 语义锚点');
    lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
    if (m.anchor.needRecall) {
        lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
-        lines.push(`├─ queries: [${(m.anchor.queries || []).slice(0, 3).join(', ')}]`);
-        lines.push(`├─ query_expansion_time: ${m.anchor.queryExpansionTime}ms`);
        lines.push(`├─ matched: ${m.anchor.matched || 0}`);
        lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
    }
    lines.push('');

+    // Lexical (MiniSearch)
+    lines.push('[Lexical] MiniSearch - 词法检索');
+    lines.push(`├─ terms: [${(m.lexical.terms || []).slice(0, 8).join(', ')}]`);
+    lines.push(`├─ atom_hits: ${m.lexical.atomHits}`);
+    lines.push(`├─ chunk_hits: ${m.lexical.chunkHits}`);
+    lines.push(`├─ event_hits: ${m.lexical.eventHits}`);
+    lines.push(`└─ search_time: ${m.lexical.searchTime}ms`);
+    lines.push('');
+
+    // Fusion (W-RRF)
+    lines.push('[Fusion] W-RRF - 多路融合');
+    lines.push(`├─ dense_count: ${m.fusion.denseCount}`);
+    lines.push(`├─ lex_count: ${m.fusion.lexCount}`);
+    lines.push(`├─ anchor_count: ${m.fusion.anchorCount}`);
+    lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
+    lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
+    lines.push(`└─ time: ${m.fusion.time}ms`);
+    lines.push('');
+
    // Constraint (L3 Facts)
    lines.push('[Constraint] L3 Facts - 世界约束');
    lines.push(`├─ total: ${m.constraint.total}`);
@@ -191,7 +241,8 @@ export function formatMetricsLog(metrics) {
    lines.push(`├─ by_recall_type:`);
    lines.push(`│   ├─ direct: ${m.event.byRecallType.direct}`);
    lines.push(`│   ├─ related: ${m.event.byRecallType.related}`);
-    lines.push(`│   └─ causal: ${m.event.byRecallType.causal}`);
+    lines.push(`│   ├─ causal: ${m.event.byRecallType.causal}`);
+    lines.push(`│   └─ lexical: ${m.event.byRecallType.lexical}`);

    const sim = m.event.similarityDistribution;
    if (sim && sim.max > 0) {
@@ -210,12 +261,9 @@ export function formatMetricsLog(metrics) {
    lines.push('[Evidence] L1 Chunks - 原文证据');
    lines.push(`├─ floors_from_anchors: ${m.evidence.floorsFromAnchors}`);

-    // 粗筛信息
    if (m.evidence.chunkTotal > 0) {
-        lines.push(`├─ coarse_filter:`);
-        lines.push(`│   ├─ total: ${m.evidence.chunkTotal}`);
-        lines.push(`│   ├─ after: ${m.evidence.chunkAfterCoarse}`);
-        lines.push(`│   └─ filtered: ${m.evidence.chunkTotal - m.evidence.chunkAfterCoarse}`);
+        lines.push(`├─ chunk_total: ${m.evidence.chunkTotal}`);
+        lines.push(`├─ dense_coarse: ${m.evidence.denseCoarse}`);
    }

    lines.push(`├─ merged: ${m.evidence.merged}`);
@@ -225,7 +273,6 @@ export function formatMetricsLog(metrics) {
        lines.push(`│   └─ chunk_real: ${mt.chunkReal || 0}`);
    }

-    // Rerank 信息
    if (m.evidence.rerankApplied) {
        lines.push(`├─ rerank_applied: true`);
        lines.push(`│   ├─ before: ${m.evidence.beforeRerank}`);
@@ -281,8 +328,11 @@ export function formatMetricsLog(metrics) {

    // Timing
    lines.push('[Timing] 计时');
-    lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`);
+    lines.push(`├─ query_build: ${m.query.buildTime}ms`);
+    lines.push(`├─ query_refine: ${m.query.refineTime}ms`);
    lines.push(`├─ anchor_search: ${m.timing.anchorSearch}ms`);
+    lines.push(`├─ lexical_search: ${m.lexical.searchTime}ms`);
+    lines.push(`├─ fusion: ${m.fusion.time}ms`);
    lines.push(`├─ constraint_filter: ${m.timing.constraintFilter}ms`);
    lines.push(`├─ event_retrieval: ${m.timing.eventRetrieval}ms`);
    lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
@@ -299,6 +349,7 @@ export function formatMetricsLog(metrics) {
    lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
    lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
    lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`);
+    lines.push(`├─ chunk_real_ratio: ${m.quality.chunkRealRatio}%`);

    if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
        lines.push(`└─ potential_issues:`);
@@ -319,14 +370,53 @@ export function formatMetricsLog(metrics) {

 /**
 * 检测潜在问题
- * @param {object} metrics - 指标对象
- * @returns {string[]} 问题列表
+ * @param {object} metrics
+ * @returns {string[]}
 */
 export function detectIssues(metrics) {
    const issues = [];
    const m = metrics;

-    // 事件召回比例问题
+    // ─────────────────────────────────────────────────────────────────
+    // 查询构建问题
+    // ─────────────────────────────────────────────────────────────────
+
+    if ((m.anchor.focusEntities || []).length === 0) {
+        issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
+    }
+
+    // ─────────────────────────────────────────────────────────────────
+    // 锚点匹配问题
+    // ─────────────────────────────────────────────────────────────────
+
+    if ((m.anchor.matched || 0) === 0 && m.anchor.needRecall) {
+        issues.push('No anchors matched - may need to generate anchors');
+    }
+
+    // ─────────────────────────────────────────────────────────────────
+    // 词法检索问题
+    // ─────────────────────────────────────────────────────────────────
+
+    if ((m.lexical.terms || []).length > 0 && m.lexical.atomHits === 0 && m.lexical.chunkHits === 0 && m.lexical.eventHits === 0) {
+        issues.push('Lexical search returned zero hits - terms may not match any indexed content');
+    }
+
+    // ─────────────────────────────────────────────────────────────────
+    // 融合问题
+    // ─────────────────────────────────────────────────────────────────
+
+    if (m.fusion.lexCount === 0 && m.fusion.denseCount > 0) {
+        issues.push('No lexical candidates in fusion - hybrid retrieval not contributing');
+    }
+
+    if (m.fusion.afterCap === 0) {
+        issues.push('Fusion produced zero candidates - all retrieval paths may have failed');
+    }
+
+    // ─────────────────────────────────────────────────────────────────
+    // 事件召回问题
+    // ─────────────────────────────────────────────────────────────────
+
    if (m.event.considered > 0) {
        const selectRatio = m.event.selected / m.event.considered;
        if (selectRatio < 0.1) {
@@ -341,7 +431,7 @@ export function detectIssues(metrics) {
    if (m.event.entityFilter) {
        const ef = m.event.entityFilter;
        if (ef.filtered === 0 && ef.before > 10) {
-            issues.push(`No events filtered by entity - focus entities may be too broad or missing`);
+            issues.push('No events filtered by entity - focus entities may be too broad or missing');
        }
        if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
            issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
@@ -355,19 +445,18 @@ export function detectIssues(metrics) {

    // 因果链问题
    if (m.event.selected > 0 && m.event.causalCount === 0 && m.event.byRecallType.direct === 0) {
-        issues.push('No direct or causal events - query expansion may be inaccurate');
+        issues.push('No direct or causal events - query may not align with stored events');
    }

-    // 锚点匹配问题
-    if ((m.anchor.matched || 0) === 0) {
-        issues.push('No anchors matched - may need to generate anchors');
-    }
+    // ─────────────────────────────────────────────────────────────────
+    // 证据问题
+    // ─────────────────────────────────────────────────────────────────

-    // 证据粗筛问题
-    if (m.evidence.chunkTotal > 0 && m.evidence.chunkAfterCoarse > 0) {
-        const coarseFilterRatio = 1 - (m.evidence.chunkAfterCoarse / m.evidence.chunkTotal);
-        if (coarseFilterRatio > 0.9) {
-            issues.push(`Very high evidence coarse filter ratio (${(coarseFilterRatio * 100).toFixed(0)}%) - query may be too specific`);
+    // Dense 粗筛比例
+    if (m.evidence.chunkTotal > 0 && m.evidence.denseCoarse > 0) {
+        const coarseFilterRatio = 1 - (m.evidence.denseCoarse / m.evidence.chunkTotal);
+        if (coarseFilterRatio > 0.95) {
+            issues.push(`Very high dense coarse filter ratio (${(coarseFilterRatio * 100).toFixed(0)}%) - query vector may be poorly aligned`);
        }
    }

@@ -376,7 +465,7 @@ export function detectIssues(metrics) {
        if (m.evidence.beforeRerank > 0 && m.evidence.afterRerank > 0) {
            const filterRatio = 1 - (m.evidence.afterRerank / m.evidence.beforeRerank);
            if (filterRatio > 0.7) {
-                issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`);
+                issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks in fusion output`);
            }
        }

@@ -395,24 +484,36 @@ export function detectIssues(metrics) {
        }
    }

-    // 证据密度问题
+    // chunk_real 比例（核心质量指标）
    if (m.evidence.selected > 0 && m.evidence.selectedByType) {
        const chunkReal = m.evidence.selectedByType.chunkReal || 0;
-        const density = chunkReal / m.evidence.selected;
-        if (density < 0.3 && m.evidence.selected > 10) {
-            issues.push(`Low real chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`);
+        const ratio = chunkReal / m.evidence.selected;
+        if (ratio === 0 && m.evidence.selected > 5) {
+            issues.push('Zero real chunks in selected evidence - only anchor virtual chunks present');
+        } else if (ratio < 0.2 && m.evidence.selected > 10) {
+            issues.push(`Low real chunk ratio (${(ratio * 100).toFixed(0)}%) - may lack concrete dialogue evidence`);
        }
    }

+    // ─────────────────────────────────────────────────────────────────
    // 预算问题
+    // ─────────────────────────────────────────────────────────────────
+
    if (m.budget.utilization > 90) {
        issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
    }

+    // ─────────────────────────────────────────────────────────────────
    // 性能问题
-    if (m.timing.total > 5000) {
+    // ─────────────────────────────────────────────────────────────────
+
+    if (m.timing.total > 8000) {
        issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
    }

+    if (m.query.buildTime > 100) {
+        issues.push(`Slow query build (${m.query.buildTime}ms) - entity lexicon may be too large`);
+    }
+
    return issues;
 }
--- a/modules/story-summary/vector/retrieval/query-builder.js
+++ b/modules/story-summary/vector/retrieval/query-builder.js
@@ -0,0 +1,341 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// query-builder.js - 确定性查询构建器（无 LLM）
+//
+// 职责：
+// 1. 从最近消息 + 实体词典构建 QueryBundle_v0
+// 2. 用第一轮召回结果增强为 QueryBundle_v1
+//
+// 不负责：向量化、检索、rerank
+// ═══════════════════════════════════════════════════════════════════════════
+
+import { getContext } from '../../../../../../../extensions.js';
+import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText } from './entity-lexicon.js';
+import { getSummaryStore } from '../../data/store.js';
+import { filterText } from '../utils/text-filter.js';
+
+// ─────────────────────────────────────────────────────────────────────────
+// 常量
+// ─────────────────────────────────────────────────────────────────────────
+
+const DIALOGUE_MAX_CHARS = 400;
+const PENDING_MAX_CHARS = 400;
+const MEMORY_HINT_MAX_CHARS = 100;
+const MEMORY_HINT_ATOMS_MAX = 5;
+const MEMORY_HINT_EVENTS_MAX = 3;
+const RERANK_QUERY_MAX_CHARS = 500;
+const RERANK_SNIPPET_CHARS = 150;
+const LEXICAL_TERMS_MAX = 10;
+const LEXICAL_TERM_MIN_LEN = 2;
+const LEXICAL_TERM_MAX_LEN = 6;
+
+// 中文停用词（高频无意义词）
+const STOP_WORDS = new Set([
+    '的', '了', '在', '是', '我', '有', '和', '就', '不', '人',
+    '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去',
+    '你', '会', '着', '没有', '看', '好', '自己', '这', '他', '她',
+    '它', '吗', '什么', '那', '里', '来', '吧', '呢', '啊', '哦',
+    '嗯', '呀', '哈', '嘿', '喂', '哎', '唉', '哇', '呃', '嘛',
+    '把', '被', '让', '给', '从', '向', '对', '跟', '比', '但',
+    '而', '或', '如果', '因为', '所以', '虽然', '但是', '然后',
+    '可以', '这样', '那样', '怎么', '为什么', '什么样', '哪里',
+    '时候', '现在', '已经', '还是', '只是', '可能', '应该', '知道',
+    '觉得', '开始', '一下', '一些', '这个', '那个', '他们', '我们',
+    '你们', '自己', '起来', '出来', '进去', '回来', '过来', '下去',
+]);
+
+// ─────────────────────────────────────────────────────────────────────────
+// 工具函数
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 清洗消息文本（与 chunk-builder / recall 保持一致）
+ * @param {string} text
+ * @returns {string}
+ */
+function cleanMessageText(text) {
+    return filterText(text)
+        .replace(/\[tts:[^\]]*\]/gi, '')
+        .replace(/<state>[\s\S]*?<\/state>/gi, '')
+        .trim();
+}
+
+/**
+ * 截断文本到指定长度
+ * @param {string} text
+ * @param {number} maxLen
+ * @returns {string}
+ */
+function truncate(text, maxLen) {
+    if (!text || text.length <= maxLen) return text || '';
+    return text.slice(0, maxLen) + '…';
+}
+
+/**
+ * 清理事件摘要（移除楼层标记）
+ * @param {string} summary
+ * @returns {string}
+ */
+function cleanSummary(summary) {
+    return String(summary || '')
+        .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
+        .trim();
+}
+
+/**
+ * 从文本中提取高频实词（用于词法检索）
+ *
+ * 策略：按中文字符边界 + 空格/标点分词，取长度 2-6 的片段
+ * 过滤停用词，按频率排序
+ *
+ * @param {string} text - 清洗后的文本
+ * @param {number} maxTerms - 最大词数
+ * @returns {string[]}
+ */
+function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
+    if (!text) return [];
+
+    // 提取连续中文片段 + 英文单词
+    const segments = text.match(/[\u4e00-\u9fff]{2,6}|[a-zA-Z]{3,}/g) || [];
+
+    const freq = new Map();
+    for (const seg of segments) {
+        const s = seg.toLowerCase();
+        if (s.length < LEXICAL_TERM_MIN_LEN || s.length > LEXICAL_TERM_MAX_LEN) continue;
+        if (STOP_WORDS.has(s)) continue;
+        freq.set(s, (freq.get(s) || 0) + 1);
+    }
+
+    return Array.from(freq.entries())
+        .sort((a, b) => b[1] - a[1])
+        .slice(0, maxTerms)
+        .map(([term]) => term);
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// QueryBundle 类型定义（JSDoc）
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * @typedef {object} QueryBundle
+ * @property {string[]} focusEntities   - 焦点实体（原词形，已排除 name1）
+ * @property {string}   queryText_v0    - 第一轮查询文本
+ * @property {string|null} queryText_v1 - 第二轮查询文本（refinement 后填充）
+ * @property {string}   rerankQuery     - rerank 用的短查询
+ * @property {string[]} lexicalTerms    - MiniSearch 查询词
+ * @property {Set<string>} _lexicon     - 实体词典（内部使用）
+ * @property {Map<string, string>} _displayMap - 标准化→原词形映射（内部使用）
+ */
+
+// ─────────────────────────────────────────────────────────────────────────
+// 阶段 1：构建 QueryBundle_v0
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 构建初始查询包
+ *
+ * @param {object[]} lastMessages - 最近 K=2 条消息
+ * @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息
+ * @param {object|null} store - getSummaryStore() 返回值（可选，内部会自动获取）
+ * @param {object|null} context - { name1, name2 }（可选，内部会自动获取）
+ * @returns {QueryBundle}
+ */
+export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) {
+    // 自动获取 store 和 context
+    if (!store) store = getSummaryStore();
+    if (!context) {
+        const ctx = getContext();
+        context = { name1: ctx.name1, name2: ctx.name2 };
+    }
+
+    // 1. 构建实体词典
+    const lexicon = buildEntityLexicon(store, context);
+    const displayMap = buildDisplayNameMap(store, context);
+
+    // 2. 清洗消息文本
+    const dialogueLines = [];
+    const allCleanText = [];
+
+    for (const m of (lastMessages || [])) {
+        const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
+        const clean = cleanMessageText(m.mes || '');
+
+        if (clean) {
+            // ★ 修复 A：不使用楼层号，embedding 模型不需要
+            dialogueLines.push(`${speaker}: ${truncate(clean, DIALOGUE_MAX_CHARS)}`);
+            allCleanText.push(clean);
+        }
+    }
+
+    // 3. 处理 pendingUserMessage
+    let pendingClean = '';
+    if (pendingUserMessage) {
+        pendingClean = cleanMessageText(pendingUserMessage);
+        if (pendingClean) {
+            allCleanText.push(pendingClean);
+        }
+    }
+
+    // 4. 提取焦点实体
+    const combinedText = allCleanText.join(' ');
+    const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);
+
+    // 5. 构建 queryText_v0
+    const queryParts = [];
+
+    if (focusEntities.length > 0) {
+        queryParts.push(`[ENTITIES]\n${focusEntities.join('\n')}`);
+    }
+
+    if (dialogueLines.length > 0) {
+        queryParts.push(`[DIALOGUE]\n${dialogueLines.join('\n')}`);
+    }
+
+    if (pendingClean) {
+        queryParts.push(`[PENDING_USER]\n${truncate(pendingClean, PENDING_MAX_CHARS)}`);
+    }
+
+    const queryText_v0 = queryParts.join('\n\n');
+
+    // 6. 构建 rerankQuery（短版）
+    const rerankParts = [];
+
+    if (focusEntities.length > 0) {
+        rerankParts.push(focusEntities.join(' '));
+    }
+
+    for (const m of (lastMessages || [])) {
+        const clean = cleanMessageText(m.mes || '');
+        if (clean) {
+            rerankParts.push(truncate(clean, RERANK_SNIPPET_CHARS));
+        }
+    }
+
+    if (pendingClean) {
+        rerankParts.push(truncate(pendingClean, RERANK_SNIPPET_CHARS));
+    }
+
+    const rerankQuery = truncate(rerankParts.join('\n'), RERANK_QUERY_MAX_CHARS);
+
+    // 7. 构建 lexicalTerms
+    const entityTerms = focusEntities.map(e => e.toLowerCase());
+    const textTerms = extractKeyTerms(combinedText);
+
+    // 合并去重：实体优先
+    const termSet = new Set(entityTerms);
+    for (const t of textTerms) {
+        if (termSet.size >= LEXICAL_TERMS_MAX) break;
+        termSet.add(t);
+    }
+
+    const lexicalTerms = Array.from(termSet);
+
+    return {
+        focusEntities,
+        queryText_v0,
+        queryText_v1: null,
+        rerankQuery,
+        lexicalTerms,
+        _lexicon: lexicon,
+        _displayMap: displayMap,
+    };
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 阶段 3：Query Refinement（用第一轮召回结果增强）
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * 用第一轮召回结果增强 QueryBundle
+ *
+ * 原地修改 bundle：
+ * - queryText_v1 = queryText_v0 + [MEMORY_HINTS]
+ * - focusEntities 可能扩展（从 anchorHits 的 subject/object 中补充）
+ * - rerankQuery 追加 memory hints 关键词
+ * - lexicalTerms 追加 memory hints 关键词
+ *
+ * @param {QueryBundle} bundle - 原始查询包
+ * @param {object[]} anchorHits - 第一轮 L0 命中（按相似度降序）
+ * @param {object[]} eventHits - 第一轮 L2 命中（按相似度降序）
+ */
+export function refineQueryBundle(bundle, anchorHits, eventHits) {
+    const hints = [];
+
+    // 1. 从 top anchorHits 提取 memory hints
+    const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX);
+    for (const hit of topAnchors) {
+        const semantic = hit.atom?.semantic || '';
+        if (semantic) {
+            hints.push(truncate(semantic, MEMORY_HINT_MAX_CHARS));
+        }
+    }
+
+    // 2. 从 top eventHits 提取 memory hints
+    const topEvents = (eventHits || []).slice(0, MEMORY_HINT_EVENTS_MAX);
+    for (const hit of topEvents) {
+        const ev = hit.event || {};
+        const title = String(ev.title || '').trim();
+        const summary = cleanSummary(ev.summary);
+        const line = title && summary
+            ? `${title}: ${summary}`
+            : title || summary;
+        if (line) {
+            hints.push(truncate(line, MEMORY_HINT_MAX_CHARS));
+        }
+    }
+
+    // 3. 构建 queryText_v1
+    if (hints.length > 0) {
+        bundle.queryText_v1 = bundle.queryText_v0 + `\n\n[MEMORY_HINTS]\n${hints.join('\n')}`;
+    } else {
+        bundle.queryText_v1 = bundle.queryText_v0;
+    }
+
+    // 4. 从 anchorHits 补充 focusEntities
+    const lexicon = bundle._lexicon;
+    const displayMap = bundle._displayMap;
+
+    if (lexicon && topAnchors.length > 0) {
+        const existingSet = new Set(bundle.focusEntities.map(e => e.toLowerCase()));
+
+        for (const hit of topAnchors) {
+            const atom = hit.atom;
+            if (!atom) continue;
+
+            // 检查 subject 和 object
+            for (const field of [atom.subject, atom.object]) {
+                if (!field) continue;
+                const norm = String(field).normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase();
+                if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) {
+                    existingSet.add(norm);
+                    const display = displayMap?.get(norm) || field;
+                    bundle.focusEntities.push(display);
+                }
+            }
+        }
+    }
+
+    // 5. 增强 rerankQuery
+    if (hints.length > 0) {
+        const hintKeywords = extractKeyTerms(hints.join(' '), 5);
+        if (hintKeywords.length > 0) {
+            const addition = hintKeywords.join(' ');
+            bundle.rerankQuery = truncate(
+                bundle.rerankQuery + '\n' + addition,
+                RERANK_QUERY_MAX_CHARS
+            );
+        }
+    }
+
+    // 6. 增强 lexicalTerms
+    if (hints.length > 0) {
+        const hintTerms = extractKeyTerms(hints.join(' '), 5);
+        const termSet = new Set(bundle.lexicalTerms);
+        for (const t of hintTerms) {
+            if (termSet.size >= LEXICAL_TERMS_MAX) break;
+            if (!termSet.has(t)) {
+                termSet.add(t);
+                bundle.lexicalTerms.push(t);
+            }
+        }
+    }
+}
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js