chore: update retrieval components

fix: qwen thinking toggle and recall log styles
2026-02-08 18:14:02 +08:00 · 2026-02-08 18:12:55 +08:00
7 changed files with 263 additions and 553 deletions
--- a/modules/story-summary/generate/prompt.js
+++ b/modules/story-summary/generate/prompt.js
@@ -1,5 +1,5 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Prompt Injection (v2 - DSL 版)
+// Story Summary - Prompt Injection (v3 - DSL 版 + Orphan 分组修复)
 // - 仅负责"构建注入文本"，不负责写入 extension_prompts
 // - 注入发生在 story-summary.js：GENERATION_STARTED 时写入 extension_prompts
 // ═══════════════════════════════════════════════════════════════════════════
@@ -23,10 +23,6 @@ const MODULE_ID = "summaryPrompt";
 let lastRecallFailAt = 0;
 const RECALL_FAIL_COOLDOWN_MS = 10_000;

-/**
- * 检查是否可以通知召回失败
- * @returns {boolean}
- */
 function canNotifyRecallFail() {
    const now = Date.now();
    if (now - lastRecallFailAt < RECALL_FAIL_COOLDOWN_MS) return false;
@@ -50,11 +46,6 @@ const TOP_N_STAR = 5;
 // 工具函数
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 估算 token 数量
- * @param {string} text - 文本
- * @returns {number} token 数
- */
 function estimateTokens(text) {
    if (!text) return 0;
    const s = String(text);
@@ -62,13 +53,6 @@ function estimateTokens(text) {
    return Math.ceil(zh + (s.length - zh) / 4);
 }

-/**
- * 带预算控制的行推入
- * @param {Array} lines - 行数组
- * @param {string} text - 文本
- * @param {object} state - 预算状态 {used, max}
- * @returns {boolean} 是否成功
- */
 function pushWithBudget(lines, text, state) {
    const t = estimateTokens(text);
    if (state.used + t > state.max) return false;
@@ -77,12 +61,6 @@ function pushWithBudget(lines, text, state) {
    return true;
 }

-/**
- * 计算余弦相似度
- * @param {Array} a - 向量 a
- * @param {Array} b - 向量 b
- * @returns {number} 相似度
- */
 function cosineSimilarity(a, b) {
    if (!a?.length || !b?.length || a.length !== b.length) return 0;
    let dot = 0, nA = 0, nB = 0;
@@ -94,11 +72,6 @@ function cosineSimilarity(a, b) {
    return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
 }

-/**
- * 解析楼层范围
- * @param {string} summary - 摘要文本
- * @returns {object|null} {start, end}
- */
 function parseFloorRange(summary) {
    if (!summary) return null;
    const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
@@ -108,22 +81,12 @@ function parseFloorRange(summary) {
    return { start, end };
 }

-/**
- * 清理摘要中的楼层标记
- * @param {string} summary - 摘要文本
- * @returns {string} 清理后的文本
- */
 function cleanSummary(summary) {
    return String(summary || "")
        .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, "")
        .trim();
 }

-/**
- * 规范化字符串（用于比较）
- * @param {string} s - 字符串
- * @returns {string} 规范化后的字符串
- */
 function normalize(s) {
    return String(s || '')
        .normalize('NFKC')
@@ -136,22 +99,11 @@ function normalize(s) {
 // 上下文配对工具函数
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 获取上下文楼层
- * @param {object} chunk - chunk 对象
- * @returns {number} 配对楼层，-1 表示无效
- */
 function getContextFloor(chunk) {
    if (chunk.isL0) return -1;
    return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1;
 }

-/**
- * 选择配对 chunk
- * @param {Array} candidates - 候选 chunks
- * @param {object} mainChunk - 主 chunk
- * @returns {object|null} 配对 chunk
- */
 function pickContextChunk(candidates, mainChunk) {
    if (!candidates?.length) return null;
    const targetIsUser = !mainChunk.isUser;
@@ -160,12 +112,6 @@ function pickContextChunk(candidates, mainChunk) {
    return candidates[0];
 }

-/**
- * 格式化上下文 chunk 行
- * @param {object} chunk - chunk 对象
- * @param {boolean} isAbove - 是否在主 chunk 上方
- * @returns {string} 格式化的行
- */
 function formatContextChunkLine(chunk, isAbove) {
    const { name1, name2 } = getContext();
    const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
@@ -178,10 +124,6 @@ function formatContextChunkLine(chunk, isAbove) {
 // 系统前导与后缀
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 构建系统前导
- * @returns {string}
- */
 function buildSystemPreamble() {
    return [
        "以上是还留在眼前的对话",
@@ -193,10 +135,6 @@ function buildSystemPreamble() {
    ].join("\n");
 }

-/**
- * 构建后缀
- * @returns {string}
- */
 function buildPostscript() {
    return [
        "",
@@ -208,28 +146,20 @@ function buildPostscript() {
 // L1 Facts 分层过滤
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 从 store 获取所有已知角色名
- * @param {object} store - summary store
- * @returns {Set<string>} 角色名集合（规范化后）
- */
 function getKnownCharacters(store) {
    const names = new Set();

-    // 从 arcs 获取
    const arcs = store?.json?.arcs || [];
    for (const a of arcs) {
        if (a.name) names.add(normalize(a.name));
    }

-    // 从 characters.main 获取
    const main = store?.json?.characters?.main || [];
    for (const m of main) {
        const name = typeof m === 'string' ? m : m.name;
        if (name) names.add(normalize(name));
    }

-    // 从当前角色获取
    const { name1, name2 } = getContext();
    if (name1) names.add(normalize(name1));
    if (name2) names.add(normalize(name2));
@@ -237,77 +167,42 @@ function getKnownCharacters(store) {
    return names;
 }

-/**
- * 解析关系类 fact 的目标人物
- * @param {string} predicate - 谓词，如 "对蓝袖的看法"
- * @returns {string|null} 目标人物名
- */
 function parseRelationTarget(predicate) {
    const match = String(predicate || '').match(/^对(.+)的/);
    return match ? match[1] : null;
 }

-/**
- * 过滤 facts（分层策略）
- *
- * 规则：
- * - isState=true：全量保留
- * - 关系类（谓词匹配 /^对.+的/）：from 或 to 在 focus 中
- * - 人物状态类（主体是已知角色名）：主体在 focus 中
- * - 其他（物品/地点/规则）：全量保留
- *
- * @param {Array} facts - 所有 facts
- * @param {Array} focusEntities - 焦点实体
- * @param {Set} knownCharacters - 已知角色名集合
- * @returns {Array} 过滤后的 facts
- */
 function filterFactsByRelevance(facts, focusEntities, knownCharacters) {
    if (!facts?.length) return [];

    const focusSet = new Set((focusEntities || []).map(normalize));

    return facts.filter(f => {
-        // 1. isState=true：全量保留
        if (f._isState === true) return true;

-        // 2. 关系类：from 或 to 在 focus 中
        if (isRelationFact(f)) {
            const from = normalize(f.s);
            const target = parseRelationTarget(f.p);
            const to = target ? normalize(target) : '';

-            // 任一方在 focus 中即保留
            if (focusSet.has(from) || focusSet.has(to)) return true;
-
-            // 都不在 focus 中则过滤
            return false;
        }

-        // 3. 主体是已知角色名：检查是否在 focus 中
        const subjectNorm = normalize(f.s);
        if (knownCharacters.has(subjectNorm)) {
            return focusSet.has(subjectNorm);
        }

-        // 4. 主体不是人名（物品/地点/规则等）：保留
        return true;
    });
 }

-/**
- * 格式化 facts 用于注入
- * @param {Array} facts - facts 数组
- * @param {Array} focusEntities - 焦点实体
- * @param {Set} knownCharacters - 已知角色名集合
- * @returns {Array} 格式化后的行
- */
 function formatFactsForInjection(facts, focusEntities, knownCharacters) {
-    // 先过滤
    const filtered = filterFactsByRelevance(facts, focusEntities, knownCharacters);

    if (!filtered.length) return [];

-    // 按 since 降序排序（最新的优先）
    return filtered
        .sort((a, b) => (b.since || 0) - (a.since || 0))
        .map(f => {
@@ -323,11 +218,6 @@ function formatFactsForInjection(facts, focusEntities, knownCharacters) {
 // 格式化函数
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 格式化角色弧光行
- * @param {object} a - 弧光对象
- * @returns {string}
- */
 function formatArcLine(a) {
    const moments = (a.moments || [])
        .map(m => (typeof m === "string" ? m : m.text))
@@ -339,11 +229,6 @@ function formatArcLine(a) {
    return `- ${a.name}：${a.trajectory}`;
 }

-/**
- * 格式化 chunk 完整行
- * @param {object} c - chunk 对象
- * @returns {string}
- */
 function formatChunkFullLine(c) {
    const { name1, name2 } = getContext();

@@ -355,38 +240,6 @@ function formatChunkFullLine(c) {
    return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
 }

-/**
- * 格式化带上下文的 chunk
- * @param {object} mainChunk - 主 chunk
- * @param {object|null} contextChunk - 上下文 chunk
- * @returns {Array} 格式化的行数组
- */
-function formatChunkWithContext(mainChunk, contextChunk) {
-    const lines = [];
-    const mainLine = formatChunkFullLine(mainChunk);
-
-    if (!contextChunk) {
-        lines.push(mainLine);
-        return lines;
-    }
-
-    if (contextChunk.floor < mainChunk.floor) {
-        lines.push(formatContextChunkLine(contextChunk, true));
-        lines.push(mainLine);
-    } else {
-        lines.push(mainLine);
-        lines.push(formatContextChunkLine(contextChunk, false));
-    }
-
-    return lines;
-}
-
-/**
- * 格式化因果事件行
- * @param {object} causalItem - 因果项
- * @param {Map} causalById - 因果映射
- * @returns {string}
- */
 function formatCausalEventLine(causalItem, causalById) {
    const ev = causalItem?.event || {};
    const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
@@ -415,22 +268,11 @@ function formatCausalEventLine(causalItem, causalById) {
    return lines.join("\n");
 }

-/**
- * 重新编号事件文本
- * @param {string} text - 事件文本
- * @param {number} newIndex - 新编号
- * @returns {string}
- */
 function renumberEventText(text, newIndex) {
    const s = String(text || "");
    return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`);
 }

-/**
- * 获取事件排序键
- * @param {object} ev - 事件对象
- * @returns {number}
- */
 function getEventSortKey(ev) {
    const r = parseFloorRange(ev?.summary);
    if (r) return r.start;
@@ -438,20 +280,98 @@ function getEventSortKey(ev) {
    return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER;
 }

+// ─────────────────────────────────────────────────────────────────────────────
+// 按楼层分组装配 orphan chunks（修复上下文重复）
+// ─────────────────────────────────────────────────────────────────────────────
+
+function assembleOrphansByFloor(orphanCandidates, contextChunksByFloor, budget) {
+    if (!orphanCandidates?.length) {
+        return { lines: [], l0Count: 0, contextPairsCount: 0 };
+    }
+
+    // 1. 按楼层分组
+    const byFloor = new Map();
+    for (const c of orphanCandidates) {
+        const arr = byFloor.get(c.floor) || [];
+        arr.push(c);
+        byFloor.set(c.floor, arr);
+    }
+
+    // 2. 楼层内按 chunkIdx 排序
+    for (const [, chunks] of byFloor) {
+        chunks.sort((a, b) => (a.chunkIdx ?? 0) - (b.chunkIdx ?? 0));
+    }
+
+    // 3. 按楼层顺序装配
+    const floorsSorted = Array.from(byFloor.keys()).sort((a, b) => a - b);
+
+    const lines = [];
+    let l0Count = 0;
+    let contextPairsCount = 0;
+
+    for (const floor of floorsSorted) {
+        const chunks = byFloor.get(floor);
+        if (!chunks?.length) continue;
+
+        // 分离 L0 和 L1
+        const l0Chunks = chunks.filter(c => c.isL0);
+        const l1Chunks = chunks.filter(c => !c.isL0);
+
+        // L0 直接输出（不需要上下文）
+        for (const c of l0Chunks) {
+            const line = formatChunkFullLine(c);
+            if (!pushWithBudget(lines, line, budget)) {
+                return { lines, l0Count, contextPairsCount };
+            }
+            l0Count++;
+        }
+
+        // L1 按楼层统一处理
+        if (l1Chunks.length > 0) {
+            const firstChunk = l1Chunks[0];
+            const pairFloor = getContextFloor(firstChunk);
+            const pairCandidates = contextChunksByFloor.get(pairFloor) || [];
+            const contextChunk = pickContextChunk(pairCandidates, firstChunk);
+
+            // 上下文在前
+            if (contextChunk && contextChunk.floor < floor) {
+                const contextLine = formatContextChunkLine(contextChunk, true);
+                if (!pushWithBudget(lines, contextLine, budget)) {
+                    return { lines, l0Count, contextPairsCount };
+                }
+                contextPairsCount++;
+            }
+
+            // 输出该楼层所有 L1 chunks
+            for (const c of l1Chunks) {
+                const line = formatChunkFullLine(c);
+                if (!pushWithBudget(lines, line, budget)) {
+                    return { lines, l0Count, contextPairsCount };
+                }
+            }
+
+            // 上下文在后
+            if (contextChunk && contextChunk.floor > floor) {
+                const contextLine = formatContextChunkLine(contextChunk, false);
+                if (!pushWithBudget(lines, contextLine, budget)) {
+                    return { lines, l0Count, contextPairsCount };
+                }
+                contextPairsCount++;
+            }
+        }
+    }
+
+    return { lines, l0Count, contextPairsCount };
+}
+
 // ─────────────────────────────────────────────────────────────────────────────
 // 非向量模式
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 构建非向量模式的 prompt
- * @param {object} store - summary store
- * @returns {string}
- */
 function buildNonVectorPrompt(store) {
    const data = store.json || {};
    const sections = [];

-    // L1 facts（非向量模式不做分层过滤，全量注入）
    const allFacts = getFacts();
    const factLines = allFacts
        .filter(f => !f.retracted)
@@ -494,10 +414,6 @@ function buildNonVectorPrompt(store) {
    );
 }

-/**
- * 构建非向量模式的注入文本
- * @returns {string}
- */
 export function buildNonVectorPromptText() {
    if (!getSettings().storySummary?.enabled) {
        return "";
@@ -524,16 +440,6 @@ export function buildNonVectorPromptText() {
 // 向量模式：预算装配
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 构建向量模式的 prompt
- * @param {object} store - summary store
- * @param {object} recallResult - 召回结果
- * @param {Map} causalById - 因果映射
- * @param {Array} focusEntities - 焦点实体
- * @param {object} meta - 元数据
- * @param {object} metrics - 指标对象
- * @returns {Promise<object>} {promptText, injectionLogText, injectionStats, metrics}
- */
 async function buildVectorPrompt(store, recallResult, causalById, focusEntities = [], meta = null, metrics = null) {
    const T_Start = performance.now();

@@ -541,7 +447,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    const data = store.json || {};
    const total = { used: 0, max: MAIN_BUDGET_MAX };

-    // 预装配容器
    const assembled = {
        facts: { lines: [], tokens: 0 },
        arcs: { lines: [], tokens: 0 },
@@ -573,7 +478,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    };

    // ═══════════════════════════════════════════════════════════════════════
-    // [优先级 1] 世界约束 - 最高优先级（带分层过滤）
+    // [优先级 1] 世界约束
    // ═══════════════════════════════════════════════════════════════════════

    const T_L1_Start = performance.now();
@@ -582,7 +487,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    const knownCharacters = getKnownCharacters(store);
    const factLines = formatFactsForInjection(allFacts, focusEntities, knownCharacters);

-    // METRICS: L1 指标
    if (metrics) {
        metrics.l1.factsTotal = allFacts.length;
        metrics.l1.factsFiltered = allFacts.length - factLines.length;
@@ -599,7 +503,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
        injectionStats.facts.tokens = l1Budget.used;
        injectionStats.facts.filtered = allFacts.length - factLines.length;

-        // METRICS
        if (metrics) {
            metrics.l1.factsInjected = assembled.facts.lines.length;
            metrics.l1.tokens = l1Budget.used;
@@ -613,7 +516,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    }

    // ═══════════════════════════════════════════════════════════════════════
-    // [优先级 2] 人物弧光 - 预留预算
+    // [优先级 2] 人物弧光
    // ═══════════════════════════════════════════════════════════════════════

    if (data.arcs?.length && total.used < total.max) {
@@ -652,13 +555,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    const chunks = recallResult?.chunks || [];
    const usedChunkIds = new Set();

-    /**
-     * 为事件选择最佳证据 chunk
-     * @param {object} eventObj - 事件对象
-     * @returns {object|null} 最佳 chunk
-     */
-
-    // 优先 L0 虚拟 chunk，否则按 chunkIdx 选第一个
    function pickBestChunkForEvent(eventObj) {
        const range = parseFloorRange(eventObj?.summary);
        if (!range) return null;
@@ -671,23 +567,14 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
            if (!best) {
                best = c;
            } else if (c.isL0 && !best.isL0) {
-                // L0 优先
                best = c;
            } else if (c.isL0 === best.isL0 && (c.chunkIdx ?? 0) < (best.chunkIdx ?? 0)) {
-                // 同类型按 chunkIdx 选靠前的
                best = c;
            }
        }
        return best;
    }

-    /**
-     * 格式化带证据的事件
-     * @param {object} e - 事件召回项
-     * @param {number} idx - 索引
-     * @param {object|null} chunk - 证据 chunk
-     * @returns {string}
-     */
    function formatEventWithEvidence(e, idx, chunk) {
        const ev = e.event || {};
        const time = ev.timeLabel || "";
@@ -775,7 +662,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
        });
    }

-    // 重排
    selectedDirect.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));
    selectedSimilar.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));

@@ -829,47 +715,22 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
    }

    if (orphanCandidates.length && total.used < total.max) {
-        const orphans = orphanCandidates
-            .sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
-
        const l1Budget = { used: 0, max: Math.min(ORPHAN_MAX, total.max - total.used) };
-        let l0Count = 0;
-        let contextPairsCount = 0;

-        for (const c of orphans) {
-            if (c.isL0) {
-                const line = formatChunkFullLine(c);
-                if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
-                injectionStats.orphans.injected++;
-                l0Count++;
-                continue;
-            }
-
-            const pairFloor = getContextFloor(c);
-            const pairCandidates = contextChunksByFloor.get(pairFloor) || [];
-            const contextChunk = pickContextChunk(pairCandidates, c);
-
-            const formattedLines = formatChunkWithContext(c, contextChunk);
-
-            let allAdded = true;
-            for (const line of formattedLines) {
-                if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) {
-                    allAdded = false;
-                    break;
-                }
-            }
-
-            if (!allAdded) break;
-
-            injectionStats.orphans.injected++;
-            if (contextChunk) contextPairsCount++;
-        }
+        const result = assembleOrphansByFloor(
+            orphanCandidates.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))),
+            contextChunksByFloor,
+            l1Budget
+        );

+        assembled.orphans.lines = result.lines;
        assembled.orphans.tokens = l1Budget.used;
        total.used += l1Budget.used;
+
+        injectionStats.orphans.injected = result.lines.length;
        injectionStats.orphans.tokens = l1Budget.used;
-        injectionStats.orphans.l0Count = l0Count;
-        injectionStats.orphans.contextPairs = contextPairsCount;
+        injectionStats.orphans.l0Count = result.l0Count;
+        injectionStats.orphans.contextPairs = result.contextPairsCount;
    }

    // ═══════════════════════════════════════════════════════════════════════
@@ -891,7 +752,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
            if (pairFloor >= 0) recentContextFloors.add(pairFloor);
        }

-        let recentContextChunksByFloor = new Map();
        if (chatId && recentContextFloors.size > 0) {
            const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f));
            if (newFloors.length > 0) {
@@ -907,47 +767,25 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
                    xbLog.warn(MODULE_ID, "获取近期配对chunks失败", e);
                }
            }
-            recentContextChunksByFloor = contextChunksByFloor;
        }

-        const recentOrphans = recentOrphanCandidates
-            .sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
+        if (recentOrphanCandidates.length) {
+            const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };

-        const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };
-        let recentContextPairsCount = 0;
+            const result = assembleOrphansByFloor(
+                recentOrphanCandidates.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))),
+                contextChunksByFloor,
+                recentBudget
+            );

-        for (const c of recentOrphans) {
-            if (c.isL0) {
-                const line = formatChunkFullLine(c);
-                if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
-                recentOrphanStats.injected++;
-                continue;
-            }
+            assembled.recentOrphans.lines = result.lines;
+            assembled.recentOrphans.tokens = recentBudget.used;

-            const pairFloor = getContextFloor(c);
-            const pairCandidates = recentContextChunksByFloor.get(pairFloor) || [];
-            const contextChunk = pickContextChunk(pairCandidates, c);
-
-            const formattedLines = formatChunkWithContext(c, contextChunk);
-
-            let allAdded = true;
-            for (const line of formattedLines) {
-                if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) {
-                    allAdded = false;
-                    break;
-                }
-            }
-
-            if (!allAdded) break;
-
-            recentOrphanStats.injected++;
-            if (contextChunk) recentContextPairsCount++;
+            recentOrphanStats.injected = result.lines.length;
+            recentOrphanStats.tokens = recentBudget.used;
+            recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
+            recentOrphanStats.contextPairs = result.contextPairsCount;
        }
-
-        assembled.recentOrphans.tokens = recentBudget.used;
-        recentOrphanStats.tokens = recentBudget.used;
-        recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
-        recentOrphanStats.contextPairs = recentContextPairsCount;
    }

    // ═══════════════════════════════════════════════════════════════════════
@@ -990,9 +828,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
        `<剧情记忆>\n\n${sections.join("\n\n")}\n\n</剧情记忆>\n` +
        `${buildPostscript()}`;

-    // METRICS: 更新 L4 和 Budget 指标
    if (metrics) {
-        // L4 指标
        metrics.l4.sectionsIncluded = [];
        if (assembled.facts.lines.length) metrics.l4.sectionsIncluded.push('constraints');
        if (assembled.events.direct.length) metrics.l4.sectionsIncluded.push('direct_events');
@@ -1004,7 +840,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
        metrics.l4.formattingTime = Math.round(performance.now() - T_L4_Start);
        metrics.timing.l4Formatting = metrics.l4.formattingTime;

-        // Budget 指标
        metrics.budget.total = total.used + (assembled.recentOrphans.tokens || 0);
        metrics.budget.limit = TOTAL_BUDGET_MAX;
        metrics.budget.utilization = Math.round(metrics.budget.total / TOTAL_BUDGET_MAX * 100);
@@ -1016,13 +851,11 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
            arcs: assembled.arcs.tokens,
        };

-        // L3 额外指标
        metrics.l3.tokens = injectionStats.orphans.tokens + (recentOrphanStats.tokens || 0);
        metrics.l3.contextPairsAdded = injectionStats.orphans.contextPairs + recentOrphanStats.contextPairs;
        metrics.l3.assemblyTime = Math.round(performance.now() - T_Start - (metrics.timing.l1Constraints || 0) - metrics.l4.formattingTime);
        metrics.timing.l3Assembly = metrics.l3.assemblyTime;

-        // 质量指标
        const totalFacts = allFacts.length;
        metrics.quality.constraintCoverage = totalFacts > 0
            ? Math.round(assembled.facts.lines.length / totalFacts * 100)
@@ -1035,7 +868,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
            ? Math.round(chunksWithEvents / totalChunks * 100)
            : 0;

-        // 检测问题
        metrics.quality.potentialIssues = detectIssues(metrics);
    }

@@ -1046,13 +878,6 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
 // 因果证据补充
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 为因果事件附加证据 chunk
- * @param {Array} causalEvents - 因果事件列表
- * @param {Map} eventVectorMap - 事件向量映射
- * @param {Map} chunkVectorMap - chunk 向量映射
- * @param {Map} chunksMap - chunk 映射
- */
 async function attachEvidenceToCausalEvents(causalEvents, eventVectorMap, chunkVectorMap, chunksMap) {
    for (const c of causalEvents) {
        c._evidenceChunk = null;
@@ -1100,12 +925,6 @@ async function attachEvidenceToCausalEvents(causalEvents, eventVectorMap, chunkV
 // 向量模式：召回 + 注入
 // ─────────────────────────────────────────────────────────────────────────────

-/**
- * 构建向量模式的注入文本
- * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息
- * @param {object} hooks - 钩子 {postToFrame, echo, pendingUserMessage}
- * @returns {Promise<object>} {text, logText}
- */
 export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
    const { postToFrame = null, echo = null, pendingUserMessage = null } = hooks;

@@ -1156,7 +975,6 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
            metrics: recallResult?.metrics || null,
        };

-        // 给因果事件挂证据
        const causalEvents = recallResult.causalEvents || [];
        if (causalEvents.length > 0) {
            if (chatId) {
@@ -1228,7 +1046,6 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
        return { text: "", logText: "\n[Vector Recall Empty]\nNo recall candidates / vectors not ready.\n" };
    }

-    // 拼装向量 prompt，传入 focusEntities 和 metrics
    const { promptText, metrics: promptMetrics } = await buildVectorPrompt(
        store,
        recallResult,
@@ -1238,16 +1055,13 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
        recallResult?.metrics || null
    );

-    // wrapper
    const cfg = getSummaryPanelConfig();
    let finalText = String(promptText || "");
    if (cfg.trigger?.wrapperHead) finalText = cfg.trigger.wrapperHead + "\n" + finalText;
    if (cfg.trigger?.wrapperTail) finalText = finalText + "\n" + cfg.trigger.wrapperTail;

-    // METRICS: 生成完整的指标日志
    const metricsLogText = promptMetrics ? formatMetricsLog(promptMetrics) : '';

-    // 发给 iframe
    if (postToFrame) {
        postToFrame({ type: "RECALL_LOG", text: metricsLogText });
    }
--- a/modules/story-summary/story-summary.css
+++ b/modules/story-summary/story-summary.css
@@ -1455,23 +1455,25 @@ h1 span {
 }

 #recall-log-content {
-    flex: 1;
-    min-height: 0;
-    white-space: pre-wrap;
-    font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
+    font-family: 'Consolas', 'Monaco', 'SF Mono', monospace;
    font-size: 12px;
    line-height: 1.6;
-    background: var(--bg3);
-    padding: 16px;
-    border-radius: 4px;
-    overflow-y: auto;
+    color: #e8e8e8;
+    white-space: pre-wrap !important;
+    overflow-x: hidden !important;
+    word-break: break-word;
+    overflow-wrap: break-word;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
 }

 .recall-empty {
-    color: var(--txt3);
+    color: #999;
    text-align: center;
    padding: 40px;
    font-style: italic;
+    font-size: .8125rem;
+    line-height: 1.8;
 }

 /* 移动端适配 */
@@ -1483,9 +1485,11 @@ h1 span {
        border-radius: 0;
    }

+    .debug-log-viewer,
    #recall-log-content {
        font-size: 11px;
        padding: 12px;
+        line-height: 1.5;
    }
 }

@@ -2732,14 +2736,18 @@ h1 span {
    margin-bottom: 4px;
 }

+/* ═══════════════════════════════════════════════════════════════════════════
+   Recall Log / Debug Log
+   ═══════════════════════════════════════════════════════════════════════════ */
+
 .debug-log-viewer {
-    background: #1e1e1e;
-    color: #d4d4d4;
+    background: #1a1a1a;
+    color: #e0e0e0;
    padding: 16px;
    border-radius: 8px;
-    font-family: 'Consolas', 'Monaco', monospace;
+    font-family: 'Consolas', 'Monaco', 'SF Mono', monospace;
    font-size: 12px;
-    line-height: 1.5;
+    line-height: 1.6;
    max-height: 60vh;
    overflow-y: auto;
    overflow-x: hidden;
@@ -2749,7 +2757,7 @@ h1 span {
 }

 .recall-empty {
-    color: var(--txt3);
+    color: #999;
    text-align: center;
    padding: 40px;
    font-style: italic;
@@ -2884,15 +2892,6 @@ h1 span {
   Metrics Log Styling
   ═══════════════════════════════════════════════════════════════════════════ */

-#recall-log-content {
-    font-family: 'SF Mono', Monaco, Consolas, 'Courier New', monospace;
-    font-size: 11px;
-    line-height: 1.5;
-    white-space: pre;
-    overflow-x: auto;
-    tab-size: 4;
-}
-
 #recall-log-content .metric-warn {
    color: #f59e0b;
 }
--- a/modules/story-summary/vector/llm/llm-service.js
+++ b/modules/story-summary/vector/llm/llm-service.js
@@ -29,7 +29,7 @@ function b64UrlEncode(str) {

 /**
 * 统一LLM调用 - 走酒馆后端（非流式）
- * 修复：assistant prefill 用 bottomassistant 参数传递
+ * assistant prefill 用 bottomassistant 参数传递
 */
 export async function callLLM(messages, options = {}) {
    const {
@@ -46,7 +46,7 @@ export async function callLLM(messages, options = {}) {
        throw new Error('L0 requires siliconflow API key');
    }

-    // ★ 关键修复：分离 assistant prefill
+    // 分离 assistant prefill
    let topMessages = [...messages];
    let assistantPrefill = '';

@@ -70,6 +70,10 @@ export async function callLLM(messages, options = {}) {
        apipassword: apiKey,
        model: DEFAULT_L0_MODEL,
    };
+    const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
+    if (isQwen3) {
+        args.enable_thinking = 'false';
+    }

    // ★ 用 bottomassistant 参数传递 prefill
    if (assistantPrefill) {
--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -48,17 +48,15 @@ export function createMetrics() {
        // L3 Evidence Assembly
        l3: {
            floorsFromL0: 0,
-            // 候选规模（rerank 前）
+            l1Total: 0,
+            l1AfterCoarse: 0,
            chunksInRange: 0,
            chunksInRangeByType: { l0Virtual: 0, l1Real: 0 },
-            // 最终注入（rerank + sparse 后）
            chunksSelected: 0,
            chunksSelectedByType: { l0Virtual: 0, l1Real: 0 },
-            // 上下文配对
            contextPairsAdded: 0,
            tokens: 0,
            assemblyTime: 0,
-            // Rerank 相关
            rerankApplied: false,
            beforeRerank: 0,
            afterRerank: 0,
@@ -80,7 +78,6 @@ export function createMetrics() {
            breakdown: {
                constraints: 0,
                events: 0,
-                entities: 0,
                chunks: 0,
                recentOrphans: 0,
                arcs: 0,
@@ -204,8 +201,15 @@ export function formatMetricsLog(metrics) {
    lines.push('[L3] Evidence Assembly');
    lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`);

-    // 候选规模
-    lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`);
+    // L1 粗筛信息
+    if (m.l3.l1Total > 0) {
+        lines.push(`├─ l1_coarse_filter:`);
+        lines.push(`│   ├─ total: ${m.l3.l1Total}`);
+        lines.push(`│   ├─ after: ${m.l3.l1AfterCoarse}`);
+        lines.push(`│   └─ filtered: ${m.l3.l1Total - m.l3.l1AfterCoarse}`);
+    }
+
+    lines.push(`├─ chunks_merged: ${m.l3.chunksInRange}`);
    if (m.l3.chunksInRangeByType) {
        const cir = m.l3.chunksInRangeByType;
        lines.push(`│   ├─ l0_virtual: ${cir.l0Virtual || 0}`);
@@ -226,7 +230,6 @@ export function formatMetricsLog(metrics) {
        lines.push(`├─ rerank_applied: false`);
    }

-    // 最终注入规模
    lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`);
    if (m.l3.chunksSelectedByType) {
        const cs = m.l3.chunksSelectedByType;
@@ -341,6 +344,14 @@ export function detectIssues(metrics) {
        issues.push('L0 atoms not matched - may need to generate anchors');
    }

+    // L1 粗筛问题
+    if (m.l3.l1Total > 0 && m.l3.l1AfterCoarse > 0) {
+        const coarseFilterRatio = 1 - (m.l3.l1AfterCoarse / m.l3.l1Total);
+        if (coarseFilterRatio > 0.9) {
+            issues.push(`Very high L1 coarse filter ratio (${(coarseFilterRatio * 100).toFixed(0)}%) - query may be too specific`);
+        }
+    }
+
    // Rerank 相关问题
    if (m.l3.rerankApplied) {
        if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) {
@@ -365,7 +376,7 @@ export function detectIssues(metrics) {
        }
    }

-    // 证据密度问题（基于 selected 的构成）
+    // 证据密度问题
    if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) {
        const l1Real = m.l3.chunksSelectedByType.l1Real || 0;
        const density = l1Real / m.l3.chunksSelected;
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -1,13 +1,8 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Recall Engine (v3 - L0 作为 L3 索引 + Rerank 精排)
-//
-// 架构：
-// - Query Expansion → L0（主索引）→ L3（按楼层拉取）→ Rerank（精排）
-// - Query Expansion → L2（独立检索）
-// - L0 和 L2 不在同一抽象层，分开处理
+// Story Summary - Recall Engine (v4 - L0 无上限 + L1 粗筛)
 // ═══════════════════════════════════════════════════════════════════════════

-import { getAllEventVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
+import { getAllEventVectors, getChunksByFloors, getMeta, getChunkVectorsByIds } from '../storage/chunk-store.js';
 import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
 import { getEngineFingerprint, embed } from '../utils/embedder.js';
 import { xbLog } from '../../../../core/debug-core.js';
@@ -27,9 +22,11 @@ const CONFIG = {
    // Query Expansion
    QUERY_EXPANSION_TIMEOUT: 6000,

-    // L0 配置
-    L0_MAX_RESULTS: 30,
-    L0_MIN_SIMILARITY: 0.50,
+    // L0 配置 - 去掉硬上限，提高阈值
+    L0_MIN_SIMILARITY: 0.58,
+
+    // L1 粗筛配置
+    L1_MAX_CANDIDATES: 100,

    // L2 配置
    L2_CANDIDATE_MAX: 100,
@@ -37,11 +34,8 @@ const CONFIG = {
    L2_MIN_SIMILARITY: 0.55,
    L2_MMR_LAMBDA: 0.72,

-    // L3 配置（从 L0 楼层拉取）
-    L3_MAX_CHUNKS_PER_FLOOR: 3,
-    L3_MAX_TOTAL_CHUNKS: 60,
-
    // Rerank 配置
+    RERANK_THRESHOLD: 80,
    RERANK_TOP_N: 50,
    RERANK_MIN_SCORE: 0.15,

@@ -49,6 +43,8 @@ const CONFIG = {
    CAUSAL_CHAIN_MAX_DEPTH: 10,
    CAUSAL_INJECT_MAX: 30,
 };
+
+// ═══════════════════════════════════════════════════════════════════════════
 // 工具函数
 // ═══════════════════════════════════════════════════════════════════════════

@@ -75,12 +71,6 @@ function cleanForRecall(text) {
    return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
 }

-/**
- * 从 focusEntities 中移除用户名
- * @param {Array} focusEntities - 焦点实体
- * @param {string} userName - 用户名
- * @returns {Array} 过滤后的实体
- */
 function removeUserNameFromFocus(focusEntities, userName) {
    const u = normalize(userName);
    if (!u) return Array.isArray(focusEntities) ? focusEntities : [];
@@ -91,28 +81,17 @@ function removeUserNameFromFocus(focusEntities, userName) {
        .filter(e => normalize(e) !== u);
 }

-/**
- * 构建用于 Rerank 的查询文本
- * 综合 Query Expansion 结果和最近对话
- * @param {object} expansion - Query Expansion 结果
- * @param {Array} lastMessages - 最近的消息
- * @param {string} pendingUserMessage - 待发送的用户消息
- * @returns {string} Rerank 用的查询文本
- */
 function buildRerankQuery(expansion, lastMessages, pendingUserMessage) {
    const parts = [];

-    // 1. focus entities
    if (expansion?.focus?.length) {
        parts.push(expansion.focus.join(' '));
    }

-    // 2. DSL queries（取前3个）
    if (expansion?.queries?.length) {
        parts.push(...expansion.queries.slice(0, 3));
    }

-    // 3. 最近对话的关键内容
    const recentTexts = (lastMessages || [])
        .slice(-2)
        .map(m => cleanForRecall(m.mes || '').slice(0, 150))
@@ -122,7 +101,6 @@ function buildRerankQuery(expansion, lastMessages, pendingUserMessage) {
        parts.push(...recentTexts);
    }

-    // 4. 待发送消息
    if (pendingUserMessage) {
        parts.push(cleanForRecall(pendingUserMessage).slice(0, 200));
    }
@@ -134,15 +112,6 @@ function buildRerankQuery(expansion, lastMessages, pendingUserMessage) {
 // MMR 选择
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * MMR 多样性选择
- * @param {Array} candidates - 候选项
- * @param {number} k - 选择数量
- * @param {number} lambda - MMR 参数
- * @param {Function} getVector - 获取向量函数
- * @param {Function} getScore - 获取分数函数
- * @returns {Array} 选中的项
- */
 function mmrSelect(candidates, k, lambda, getVector, getScore) {
    const selected = [];
    const ids = new Set();
@@ -183,23 +152,15 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// L0 检索：Query → L0 → 楼层集合
+// L0 检索：无上限，阈值过滤
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * L0 向量检索
- * @param {Array} queryVector - 查询向量
- * @param {object} vectorConfig - 向量配置
- * @param {object} metrics - 指标对象
- * @returns {Promise<object>} {atoms, floors}
- */
 async function searchL0(queryVector, vectorConfig, metrics) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length) {
        return { atoms: [], floors: new Set() };
    }

-    // 检查 fingerprint
    const meta = await getMeta(chatId);
    const fp = getEngineFingerprint(vectorConfig);
    if (meta.fingerprint && meta.fingerprint !== fp) {
@@ -207,17 +168,15 @@ async function searchL0(queryVector, vectorConfig, metrics) {
        return { atoms: [], floors: new Set() };
    }

-    // 获取向量
    const stateVectors = await getAllStateVectors(chatId);
    if (!stateVectors.length) {
        return { atoms: [], floors: new Set() };
    }

-    // 获取 atoms 元数据
    const atomsList = getStateAtoms();
    const atomMap = new Map(atomsList.map(a => [a.atomId, a]));

-    // 计算相似度
+    // ★ 只按阈值过滤，不设硬上限
    const scored = stateVectors
        .map(sv => {
            const atom = atomMap.get(sv.atomId);
@@ -232,13 +191,10 @@ async function searchL0(queryVector, vectorConfig, metrics) {
        })
        .filter(Boolean)
        .filter(s => s.similarity >= CONFIG.L0_MIN_SIMILARITY)
-        .sort((a, b) => b.similarity - a.similarity)
-        .slice(0, CONFIG.L0_MAX_RESULTS);
+        .sort((a, b) => b.similarity - a.similarity);

-    // 收集楼层
    const floors = new Set(scored.map(s => s.floor));

-    // 更新 metrics
    if (metrics) {
        metrics.l0.atomsMatched = scored.length;
        metrics.l0.floorsHit = floors.size;
@@ -253,48 +209,9 @@ async function searchL0(queryVector, vectorConfig, metrics) {
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// L3 拉取：L0 楼层 → Chunks（带 Rerank 精排）
+// 统计 chunks 类型构成
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 按楼层稀疏去重
- * 每楼层最多保留 limit 个 chunk，优先保留分数高的
- * @param {Array} chunks - chunk 列表（假设已按分数排序）
- * @param {number} limit - 每楼层上限
- * @returns {Array} 去重后的 chunks
- */
-function sparseByFloor(chunks, limit = 3) {
-    const byFloor = new Map();
-
-    for (const c of chunks) {
-        const arr = byFloor.get(c.floor) || [];
-        if (arr.length < limit) {
-            arr.push(c);
-            byFloor.set(c.floor, arr);
-        }
-    }
-
-    const result = [];
-    const seen = new Set();
-
-    for (const c of chunks) {
-        if (!seen.has(c.chunkId)) {
-            const arr = byFloor.get(c.floor);
-            if (arr?.includes(c)) {
-                result.push(c);
-                seen.add(c.chunkId);
-            }
-        }
-    }
-
-    return result;
-}
-
-/**
- * 统计 chunks 的类型构成
- * @param {Array} chunks - chunk 列表
- * @returns {object} {l0Virtual, l1Real}
- */
 function countChunksByType(chunks) {
    let l0Virtual = 0;
    let l1Real = 0;
@@ -310,15 +227,11 @@ function countChunksByType(chunks) {
    return { l0Virtual, l1Real };
 }

-/**
- * 从 L0 命中楼层拉取 chunks，并用 Reranker 精排
- * @param {Set} l0Floors - L0 命中的楼层
- * @param {Array} l0Atoms - L0 atoms（用于构建虚拟 chunks）
- * @param {string} queryText - 查询文本（用于 rerank）
- * @param {object} metrics - 指标对象
- * @returns {Promise<Array>} chunks 列表
- */
-async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) {
+// ═══════════════════════════════════════════════════════════════════════════
+// L3 拉取 + L1 粗筛 + Rerank
+// ═══════════════════════════════════════════════════════════════════════════
+
+async function getChunksFromL0Floors(l0Floors, l0Atoms, queryVector, queryText, metrics) {
    const { chatId } = getContext();
    if (!chatId || !l0Floors.size) {
        return [];
@@ -326,15 +239,7 @@ async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) {

    const floorArray = Array.from(l0Floors);

-    // 从 DB 拉取 chunks
-    let dbChunks = [];
-    try {
-        dbChunks = await getChunksByFloors(chatId, floorArray);
-    } catch (e) {
-        xbLog.warn(MODULE_ID, '从 DB 拉取 chunks 失败', e);
-    }
-
-    // 构建 L0 虚拟 chunks
+    // 1. 构建 L0 虚拟 chunks
    const l0VirtualChunks = (l0Atoms || []).map(a => ({
        chunkId: `state-${a.atomId}`,
        floor: a.floor,
@@ -347,40 +252,69 @@ async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) {
        _atom: a.atom,
    }));

-    // 合并所有 chunks
-    const allChunks = [...l0VirtualChunks, ...dbChunks.map(c => ({
-        ...c,
-        isL0: false,
-        similarity: 0.5,
-    }))];
+    // 2. 拉取 L1 chunks
+    let dbChunks = [];
+    try {
+        dbChunks = await getChunksByFloors(chatId, floorArray);
+    } catch (e) {
+        xbLog.warn(MODULE_ID, '从 DB 拉取 chunks 失败', e);
+    }

-    // ★ 更新 metrics - 候选规模（rerank 前）
+    // 3. ★ L1 向量粗筛
+    let l1Filtered = [];
+    if (dbChunks.length > 0 && queryVector?.length) {
+        const chunkIds = dbChunks.map(c => c.chunkId);
+        let chunkVectors = [];
+        try {
+            chunkVectors = await getChunkVectorsByIds(chatId, chunkIds);
+        } catch (e) {
+            xbLog.warn(MODULE_ID, 'L1 向量获取失败', e);
+        }
+        
+        const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector]));
+
+        l1Filtered = dbChunks
+            .map(c => {
+                const vec = vectorMap.get(c.chunkId);
+                if (!vec?.length) return null;
+
+                return {
+                    ...c,
+                    isL0: false,
+                    similarity: cosineSimilarity(queryVector, vec),
+                };
+            })
+            .filter(Boolean)
+            .sort((a, b) => b.similarity - a.similarity)
+            .slice(0, CONFIG.L1_MAX_CANDIDATES);
+    }
+
+    // 4. 合并
+    const allChunks = [...l0VirtualChunks, ...l1Filtered];
+
+    // ★ 更新 metrics
    if (metrics) {
        metrics.l3.floorsFromL0 = floorArray.length;
-        metrics.l3.chunksInRange = allChunks.length;
+        metrics.l3.l1Total = dbChunks.length;
+        metrics.l3.l1AfterCoarse = l1Filtered.length;
+        metrics.l3.chunksInRange = l0VirtualChunks.length + l1Filtered.length;
        metrics.l3.chunksInRangeByType = {
            l0Virtual: l0VirtualChunks.length,
-            l1Real: dbChunks.length,
+            l1Real: l1Filtered.length,
        };
    }

-    // 如果数量不超限，直接按楼层去重返回
-    if (allChunks.length <= CONFIG.L3_MAX_TOTAL_CHUNKS) {
-        allChunks.sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
-
-        const selected = sparseByFloor(allChunks, CONFIG.L3_MAX_CHUNKS_PER_FLOOR);
-
-        // ★ 更新 metrics - 最终注入规模
+    // 5. 是否需要 Rerank
+    if (allChunks.length <= CONFIG.RERANK_THRESHOLD) {
        if (metrics) {
            metrics.l3.rerankApplied = false;
-            metrics.l3.chunksSelected = selected.length;
-            metrics.l3.chunksSelectedByType = countChunksByType(selected);
+            metrics.l3.chunksSelected = allChunks.length;
+            metrics.l3.chunksSelectedByType = countChunksByType(allChunks);
        }
-
-        return selected;
+        return allChunks;
    }

-    // ★ Reranker 精排
+    // 6. Rerank 精排
    const T_Rerank_Start = performance.now();

    const reranked = await rerankChunks(queryText, allChunks, {
@@ -390,21 +324,16 @@ async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) {

    const rerankTime = Math.round(performance.now() - T_Rerank_Start);

-    // 按楼层稀疏去重
-    const selected = sparseByFloor(reranked, CONFIG.L3_MAX_CHUNKS_PER_FLOOR);
-
-    // ★ 更新 metrics
    if (metrics) {
        metrics.l3.rerankApplied = true;
        metrics.l3.beforeRerank = allChunks.length;
        metrics.l3.afterRerank = reranked.length;
-        metrics.l3.chunksSelected = selected.length;
-        metrics.l3.chunksSelectedByType = countChunksByType(selected);
+        metrics.l3.chunksSelected = reranked.length;
+        metrics.l3.chunksSelectedByType = countChunksByType(reranked);
        metrics.l3.rerankTime = rerankTime;
        metrics.timing.l3Rerank = rerankTime;

-        // rerank 分数分布（基于 selected）
-        const scores = selected.map(c => c._rerankScore || 0).filter(s => s > 0);
+        const scores = reranked.map(c => c._rerankScore || 0).filter(s => s > 0);
        if (scores.length > 0) {
            scores.sort((a, b) => a - b);
            metrics.l3.rerankScoreDistribution = {
@@ -415,31 +344,21 @@ async function getChunksFromL0Floors(l0Floors, l0Atoms, queryText, metrics) {
        }
    }

-    xbLog.info(MODULE_ID, `L3 Rerank: ${allChunks.length} → ${reranked.length} → ${selected.length} (${rerankTime}ms)`);
+    xbLog.info(MODULE_ID, `L3: ${dbChunks.length} L1 → ${l1Filtered.length} 粗筛 → ${reranked.length} Rerank (${rerankTime}ms)`);

-    return selected;
+    return reranked;
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// L2 检索：Query → Events（独立）
+// L2 检索（保持不变）
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * L2 事件向量检索
- * @param {Array} queryVector - 查询向量
- * @param {Array} allEvents - 所有事件
- * @param {object} vectorConfig - 向量配置
- * @param {Array} focusEntities - 焦点实体（用于实体过滤）
- * @param {object} metrics - 指标对象
- * @returns {Promise<Array>} 事件列表
- */
 async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntities, metrics) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length || !allEvents?.length) {
        return [];
    }

-    // 检查 fingerprint
    const meta = await getMeta(chatId);
    const fp = getEngineFingerprint(vectorConfig);
    if (meta.fingerprint && meta.fingerprint !== fp) {
@@ -447,7 +366,6 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        return [];
    }

-    // 获取事件向量
    const eventVectors = await getAllEventVectors(chatId);
    const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));

@@ -455,19 +373,15 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        return [];
    }

-    // 实体匹配集合
    const focusSet = new Set((focusEntities || []).map(normalize));

-    // 计算相似度
    const scored = allEvents.map(event => {
        const v = vectorMap.get(event.id);
        const baseSim = v ? cosineSimilarity(queryVector, v) : 0;

-        // 实体命中检查
        const participants = (event.participants || []).map(p => normalize(p));
        const hasEntityMatch = participants.some(p => focusSet.has(p));

-        // 实体匹配加权
        const bonus = hasEntityMatch ? 0.05 : 0;

        return {
@@ -480,12 +394,10 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        };
    });

-    // 更新 metrics
    if (metrics) {
        metrics.l2.eventsInStore = allEvents.length;
    }

-    // 阈值过滤
    let candidates = scored
        .filter(s => s.similarity >= CONFIG.L2_MIN_SIMILARITY)
        .sort((a, b) => b.similarity - a.similarity)
@@ -495,14 +407,11 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        metrics.l2.eventsConsidered = candidates.length;
    }

-    // 实体过滤（可选）
    if (focusSet.size > 0) {
        const beforeFilter = candidates.length;

        candidates = candidates.filter(c => {
-            // 高相似度绕过
            if (c.similarity >= 0.85) return true;
-            // 有实体匹配的保留
            return c._hasEntityMatch;
        });

@@ -516,7 +425,6 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        }
    }

-    // MMR 去重
    const selected = mmrSelect(
        candidates,
        CONFIG.L2_SELECT_MAX,
@@ -525,7 +433,6 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        c => c.similarity
    );

-    // 统计召回类型
    let directCount = 0;
    let contextCount = 0;

@@ -542,7 +449,6 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
        };
    });

-    // 更新 metrics
    if (metrics) {
        metrics.l2.eventsSelected = results.length;
        metrics.l2.byRecallType = { direct: directCount, context: contextCount, causal: 0 };
@@ -553,14 +459,9 @@ async function searchL2Events(queryVector, allEvents, vectorConfig, focusEntitie
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// 因果链追溯
+// 因果链追溯（保持不变）
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 构建事件索引
- * @param {Array} allEvents - 所有事件
- * @returns {Map} 事件索引
- */
 function buildEventIndex(allEvents) {
    const map = new Map();
    for (const e of allEvents || []) {
@@ -569,13 +470,6 @@ function buildEventIndex(allEvents) {
    return map;
 }

-/**
- * 追溯因果祖先
- * @param {Array} recalledEvents - 召回的事件
- * @param {Map} eventIndex - 事件索引
- * @param {number} maxDepth - 最大深度
- * @returns {object} {results, maxDepth}
- */
 function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
    const out = new Map();
    const idRe = /^evt-\d+$/;
@@ -626,19 +520,11 @@ function traceCausalAncestors(recalledEvents, eventIndex, maxDepth = CONFIG.CAUS
 // 辅助函数
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 获取最近的消息
- * @param {Array} chat - 聊天数组
- * @param {number} count - 消息数量
- * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息
- * @returns {Array} 消息列表
- */
 function getLastMessages(chat, count = 4, excludeLastAi = false) {
    if (!chat?.length) return [];

    let messages = [...chat];

-    // 排除最后一条 AI 消息（swipe/regenerate 场景）
    if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
        messages = messages.slice(0, -1);
    }
@@ -646,13 +532,6 @@ function getLastMessages(chat, count = 4, excludeLastAi = false) {
    return messages.slice(-count);
 }

-/**
- * 构建查询文本（降级用）
- * @param {Array} chat - 聊天数组
- * @param {number} count - 消息数量
- * @param {boolean} excludeLastAi - 是否排除最后一条 AI 消息
- * @returns {string} 查询文本
- */
 export function buildQueryText(chat, count = 2, excludeLastAi = false) {
    if (!chat?.length) return '';

@@ -672,14 +551,6 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
 // 主函数
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 记忆召回主函数
- * @param {string} queryText - 查询文本（降级用）
- * @param {Array} allEvents - 所有事件
- * @param {object} vectorConfig - 向量配置
- * @param {object} options - 选项
- * @returns {Promise<object>} 召回结果
- */
 export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
    const T0 = performance.now();
    const { chat, name1 } = getContext();
@@ -698,7 +569,6 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =

    const T_QE_Start = performance.now();

-    // 获取最近对话
    const lastMessages = getLastMessages(chat, 4, excludeLastAi);

    let expansion = { focus: [], queries: [] };
@@ -712,14 +582,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
        xbLog.warn(MODULE_ID, 'Query Expansion 失败，降级使用原始文本', e);
    }

-    // 构建检索文本
    const searchText = buildSearchText(expansion);
    const finalSearchText = searchText || queryText || lastMessages.map(m => cleanForRecall(m.mes || '').slice(0, 200)).join(' ');

-    // focusEntities（移除用户名）
    const focusEntities = removeUserNameFromFocus(expansion.focus, name1);

-    // 更新 L0 metrics
    metrics.l0.needRecall = true;
    metrics.l0.focusEntities = focusEntities;
    metrics.l0.queries = expansion.queries || [];
@@ -746,7 +613,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
    }

    // ═══════════════════════════════════════════════════════════════════════
-    // Step 3: L0 检索 → L3 拉取（并行准备）
+    // Step 3: L0 检索
    // ═══════════════════════════════════════════════════════════════════════

    const T_L0_Start = performance.now();
@@ -756,15 +623,13 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
    metrics.timing.l0Search = Math.round(performance.now() - T_L0_Start);

    // ═══════════════════════════════════════════════════════════════════════
-    // Step 4: L3 从 L0 楼层拉取（带 Rerank）
+    // Step 4: L3 拉取 + L1 粗筛 + Rerank
    // ═══════════════════════════════════════════════════════════════════════

    const T_L3_Start = performance.now();

-    // 构建 rerank 用的查询文本
    const rerankQuery = buildRerankQuery(expansion, lastMessages, pendingUserMessage);
-
-    const chunks = await getChunksFromL0Floors(l0Floors, l0Atoms, rerankQuery, metrics);
+    const chunks = await getChunksFromL0Floors(l0Floors, l0Atoms, queryVector, rerankQuery, metrics);

    metrics.timing.l3Retrieval = Math.round(performance.now() - T_L3_Start);

@@ -796,7 +661,6 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
            chainFrom: x.chainFrom,
        }));

-    // 更新因果链 metrics
    if (metrics.l2.byRecallType) {
        metrics.l2.byRecallType.causal = causalEvents.length;
    }
@@ -809,16 +673,14 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =

    metrics.timing.total = Math.round(performance.now() - T0);

-    // 实体信息
    metrics.l2.entityNames = focusEntities;
    metrics.l2.entitiesLoaded = focusEntities.length;

-    // 日志
-    console.group('%c[Recall v3]', 'color: #7c3aed; font-weight: bold');
+    console.group('%c[Recall v4]', 'color: #7c3aed; font-weight: bold');
    console.log(`Elapsed: ${metrics.timing.total}ms`);
    console.log(`Query Expansion: focus=[${expansion.focus.join(', ')}]`);
    console.log(`L0: ${l0Atoms.length} atoms → ${l0Floors.size} floors`);
-    console.log(`L3: ${chunks.length} chunks (L0=${metrics.l3.chunksSelectedByType?.l0Virtual || 0}, DB=${metrics.l3.chunksSelectedByType?.l1Real || 0})`);
+    console.log(`L3: ${metrics.l3.l1Total || 0} L1 → ${metrics.l3.l1AfterCoarse || 0} 粗筛 → ${chunks.length} final`);
    if (metrics.l3.rerankApplied) {
        console.log(`L3 Rerank: ${metrics.l3.beforeRerank} → ${metrics.l3.afterRerank} (${metrics.l3.rerankTime}ms)`);
    }
--- a/modules/story-summary/vector/storage/chunk-store.js
+++ b/modules/story-summary/vector/storage/chunk-store.js
@@ -159,6 +159,20 @@ export async function getAllChunkVectors(chatId) {
    }));
 }

+export async function getChunkVectorsByIds(chatId, chunkIds) {
+    if (!chatId || !chunkIds?.length) return [];
+    
+    const records = await chunkVectorsTable
+        .where('[chatId+chunkId]')
+        .anyOf(chunkIds.map(id => [chatId, id]))
+        .toArray();
+    
+    return records.map(r => ({
+        chunkId: r.chunkId,
+        vector: bufferToFloat32(r.vector),
+    }));
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // EventVectors 表操作
 // ═══════════════════════════════════════════════════════════════════════════
--- a/modules/streaming-generation.js
+++ b/modules/streaming-generation.js
@@ -240,6 +240,9 @@ class StreamingGeneration {
                include_reasoning: oai_settings?.show_thoughts ?? true,
                reasoning_effort: oai_settings?.reasoning_effort || 'medium',
            };
+            if (baseOptions?.enable_thinking !== undefined) body.enable_thinking = baseOptions.enable_thinking;
+            if (baseOptions?.thinking_budget !== undefined) body.thinking_budget = baseOptions.thinking_budget;
+            if (baseOptions?.min_p !== undefined) body.min_p = baseOptions.min_p;

            // Claude 专用：top_k
            if (source === chat_completion_sources.CLAUDE) {
@@ -949,6 +952,9 @@ class StreamingGeneration {
            temperature: this.parseOpt(args, 'temperature'),
            presence_penalty: this.parseOpt(args, 'presence_penalty'),
            frequency_penalty: this.parseOpt(args, 'frequency_penalty'),
+            enable_thinking: this.parseOpt(args, 'enable_thinking'),
+            thinking_budget: this.parseOpt(args, 'thinking_budget'),
+            min_p: this.parseOpt(args, 'min_p'),
        };
        let parsedStop;
        try {
Author	SHA1	Message	Date
bielie	8226c48624	chore: update retrieval components	2026-02-08 18:14:02 +08:00
bielie	8fdce7b9a1	fix: qwen thinking toggle and recall log styles	2026-02-08 18:12:55 +08:00