From 59e7301bf8fc9616522da2c98db3bfcc72558105 Mon Sep 17 00:00:00 2001
From: RT15548 <168917470+RT15548@users.noreply.github.com>
Date: Tue, 17 Feb 2026 15:24:39 +0800
Subject: [PATCH] Add files via upload

---
 modules/story-summary/generate/prompt.js      |  76 +++++---
 modules/story-summary/story-summary-a.css     |   4 +
 modules/story-summary/story-summary-ui.js     |  54 +++++-
 modules/story-summary/story-summary.css       |   4 +
 modules/story-summary/story-summary.html      |  22 +++
 modules/story-summary/story-summary.js        | 147 ++++++++------
 .../story-summary/vector/llm/llm-service.js   |   4 +-
 .../story-summary/vector/llm/siliconflow.js   |  50 ++++-
 .../vector/pipeline/state-integration.js      | 182 +++++++++++++-----
 .../story-summary/vector/retrieval/recall.js  |   4 +-
 10 files changed, 401 insertions(+), 146 deletions(-)

diff --git a/modules/story-summary/generate/prompt.js b/modules/story-summary/generate/prompt.js
index 162d650..8ac6d98 100644
--- a/modules/story-summary/generate/prompt.js
+++ b/modules/story-summary/generate/prompt.js
@@ -49,7 +49,7 @@ const CONSTRAINT_MAX = 2000;
 const ARCS_MAX = 1500;
 const EVENT_BUDGET_MAX = 5000;
 const RELATED_EVENT_MAX = 500;
-const SUMMARIZED_EVIDENCE_MAX = 1500;
+const SUMMARIZED_EVIDENCE_MAX = 2000;
 const UNSUMMARIZED_EVIDENCE_MAX = 2000;
 const TOP_N_STAR = 5;
 
@@ -949,6 +949,8 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter
     const candidates = [...eventHits].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
     const eventBudget = { used: 0, max: Math.min(EVENT_BUDGET_MAX, total.max - total.used) };
     const relatedBudget = { used: 0, max: RELATED_EVENT_MAX };
+    // Once budget becomes tight, keep high-score L2 summaries and stop attaching evidence.
+    let allowEventEvidence = true;
 
     const selectedDirect = [];
     const selectedRelated = [];
@@ -964,27 +966,39 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter
 
         // 硬规则：RELATED 事件不挂证据（不挂 L0/L1，只保留事件摘要）
         // DIRECT 才允许收集事件内证据组。
-        const evidenceGroups = isDirect
+        const useEvidenceForThisEvent = isDirect && allowEventEvidence;
+        const evidenceGroups = useEvidenceForThisEvent
             ? collectEvidenceGroupsForEvent(e.event, l0Selected, l1ByFloor, usedL0Ids)
             : [];
 
         // 格式化事件（含证据）
         const text = formatEventWithEvidence(e, 0, evidenceGroups, causalById);
         const cost = estimateTokens(text);
+        const fitEventBudget = eventBudget.used + cost <= eventBudget.max;
+        const fitRelatedBudget = isDirect || (relatedBudget.used + cost <= relatedBudget.max);
 
         // 预算检查：整个事件（含证据）作为原子单元
-        if (total.used + cost > total.max) {
+        // 约束：总预算 + 事件预算 + related 子预算（若 applicable）
+        if (total.used + cost > total.max || !fitEventBudget || !fitRelatedBudget) {
             // 尝试不带证据的版本
             const textNoEvidence = formatEventWithEvidence(e, 0, [], causalById);
             const costNoEvidence = estimateTokens(textNoEvidence);
+            const fitEventBudgetNoEvidence = eventBudget.used + costNoEvidence <= eventBudget.max;
+            const fitRelatedBudgetNoEvidence = isDirect || (relatedBudget.used + costNoEvidence <= relatedBudget.max);
 
-            if (total.used + costNoEvidence > total.max) {
+            if (total.used + costNoEvidence > total.max || !fitEventBudgetNoEvidence || !fitRelatedBudgetNoEvidence) {
                 // 归还 usedL0Ids
                 for (const group of evidenceGroups) {
                     for (const l0 of group.l0Atoms) {
                         usedL0Ids.delete(l0.id);
                     }
                 }
+                // Hard cap reached: no-evidence version also cannot fit total/event budget.
+                // Keep ranking semantics (higher-score events first): stop here.
+                if (total.used + costNoEvidence > total.max || !fitEventBudgetNoEvidence) {
+                    break;
+                }
+                // Related sub-budget overflow: skip this related event and continue.
                 continue;
             }
 
@@ -994,6 +1008,10 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter
                     usedL0Ids.delete(l0.id);
                 }
             }
+            // Enter summary-only mode after first budget conflict on evidence.
+            if (useEvidenceForThisEvent && evidenceGroups.length > 0) {
+                allowEventEvidence = false;
+            }
 
             if (isDirect) {
                 selectedDirect.push({
@@ -1112,26 +1130,32 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter
     if (distantL0.length && total.used < total.max) {
         const distantBudget = { used: 0, max: Math.min(SUMMARIZED_EVIDENCE_MAX, total.max - total.used) };
 
-        // 按楼层排序（时间顺序）后分组
-        distantL0.sort((a, b) => a.floor - b.floor);
+        // 先按分数挑组（高分优先），再按时间输出（楼层升序）
         const distantFloorMap = groupL0ByFloor(distantL0);
-
-        // 按楼层顺序遍历（Map 保持插入顺序，distantL0 已按 floor 排序）
+        const distantRanked = [];
         for (const [floor, l0s] of distantFloorMap) {
             const group = buildEvidenceGroup(floor, l0s, l1ByFloor);
+            const bestScore = Math.max(...l0s.map(l0 => (l0.rerankScore ?? l0.similarity ?? 0)));
+            distantRanked.push({ group, bestScore });
+        }
+        distantRanked.sort((a, b) => (b.bestScore - a.bestScore) || (a.group.floor - b.group.floor));
 
-            // 原子组预算检查
+        const acceptedDistantGroups = [];
+        for (const item of distantRanked) {
+            const group = item.group;
             if (distantBudget.used + group.totalTokens > distantBudget.max) continue;
+            distantBudget.used += group.totalTokens;
+            acceptedDistantGroups.push(group);
+            for (const l0 of group.l0Atoms) usedL0Ids.add(l0.id);
+            injectionStats.distantEvidence.units++;
+        }
 
+        acceptedDistantGroups.sort((a, b) => a.floor - b.floor);
+        for (const group of acceptedDistantGroups) {
             const groupLines = formatEvidenceGroup(group);
             for (const line of groupLines) {
                 assembled.distantEvidence.lines.push(line);
             }
-            distantBudget.used += group.totalTokens;
-            for (const l0 of l0s) {
-                usedL0Ids.add(l0.id);
-            }
-            injectionStats.distantEvidence.units++;
         }
 
         assembled.distantEvidence.tokens = distantBudget.used;
@@ -1154,24 +1178,32 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter
         if (recentL0.length) {
             const recentBudget = { used: 0, max: UNSUMMARIZED_EVIDENCE_MAX };
 
-            // 按楼层排序后分组
-            recentL0.sort((a, b) => a.floor - b.floor);
+            // 先按分数挑组（高分优先），再按时间输出（楼层升序）
             const recentFloorMap = groupL0ByFloor(recentL0);
-
+            const recentRanked = [];
             for (const [floor, l0s] of recentFloorMap) {
                 const group = buildEvidenceGroup(floor, l0s, l1ByFloor);
+                const bestScore = Math.max(...l0s.map(l0 => (l0.rerankScore ?? l0.similarity ?? 0)));
+                recentRanked.push({ group, bestScore });
+            }
+            recentRanked.sort((a, b) => (b.bestScore - a.bestScore) || (a.group.floor - b.group.floor));
 
+            const acceptedRecentGroups = [];
+            for (const item of recentRanked) {
+                const group = item.group;
                 if (recentBudget.used + group.totalTokens > recentBudget.max) continue;
+                recentBudget.used += group.totalTokens;
+                acceptedRecentGroups.push(group);
+                for (const l0 of group.l0Atoms) usedL0Ids.add(l0.id);
+                injectionStats.recentEvidence.units++;
+            }
 
+            acceptedRecentGroups.sort((a, b) => a.floor - b.floor);
+            for (const group of acceptedRecentGroups) {
                 const groupLines = formatEvidenceGroup(group);
                 for (const line of groupLines) {
                     assembled.recentEvidence.lines.push(line);
                 }
-                recentBudget.used += group.totalTokens;
-                for (const l0 of l0s) {
-                    usedL0Ids.add(l0.id);
-                }
-                injectionStats.recentEvidence.units++;
             }
 
             assembled.recentEvidence.tokens = recentBudget.used;
diff --git a/modules/story-summary/story-summary-a.css b/modules/story-summary/story-summary-a.css
index 83157dc..8db28eb 100644
--- a/modules/story-summary/story-summary-a.css
+++ b/modules/story-summary/story-summary-a.css
@@ -21,6 +21,10 @@
     padding-right: 4px;
 }
 
+.confirm-modal-box {
+    max-width: 440px;
+}
+
 .fact-group {
     margin-bottom: 12px;
 }
diff --git a/modules/story-summary/story-summary-ui.js b/modules/story-summary/story-summary-ui.js
index 7cff296..6a4d5cb 100644
--- a/modules/story-summary/story-summary-ui.js
+++ b/modules/story-summary/story-summary-ui.js
@@ -358,8 +358,8 @@
             postMsg('ANCHOR_GENERATE');
         };
 
-        $('btn-anchor-clear').onclick = () => {
-            if (confirm('清空所有记忆锚点？（L0 向量也会一并清除）')) {
+        $('btn-anchor-clear').onclick = async () => {
+            if (await showConfirm('清空锚点', '清空所有记忆锚点？（L0 向量也会一并清除）')) {
                 postMsg('ANCHOR_CLEAR');
             }
         };
@@ -375,6 +375,7 @@
         };
 
         $('btn-test-vector-api').onclick = () => {
+            saveConfig(); // 先保存新 Key 到 localStorage
             postMsg('VECTOR_TEST_ONLINE', {
                 provider: 'siliconflow',
                 config: {
@@ -391,8 +392,10 @@
             postMsg('VECTOR_GENERATE', { config: getVectorConfig() });
         };
 
-        $('btn-clear-vectors').onclick = () => {
-            if (confirm('确定清空所有向量数据？')) postMsg('VECTOR_CLEAR');
+        $('btn-clear-vectors').onclick = async () => {
+            if (await showConfirm('清空向量', '确定清空所有向量数据？')) {
+                postMsg('VECTOR_CLEAR');
+            }
         };
 
         $('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE');
@@ -955,6 +958,43 @@
         postMsg('FULLSCREEN_CLOSED');
     }
 
+    /**
+     * 显示通用确认弹窗
+     * @returns {Promise<boolean>}
+     */
+    function showConfirm(title, message, okText = '执行', cancelText = '取消') {
+        return new Promise(resolve => {
+            const modal = $('confirm-modal');
+            const titleEl = $('confirm-title');
+            const msgEl = $('confirm-message');
+            const okBtn = $('confirm-ok');
+            const cancelBtn = $('confirm-cancel');
+            const closeBtn = $('confirm-close');
+            const backdrop = $('confirm-backdrop');
+
+            titleEl.textContent = title;
+            msgEl.textContent = message;
+            okBtn.textContent = okText;
+            cancelBtn.textContent = cancelText;
+
+            const close = (result) => {
+                modal.classList.remove('active');
+                okBtn.onclick = null;
+                cancelBtn.onclick = null;
+                closeBtn.onclick = null;
+                backdrop.onclick = null;
+                resolve(result);
+            };
+
+            okBtn.onclick = () => close(true);
+            cancelBtn.onclick = () => close(false);
+            closeBtn.onclick = () => close(false);
+            backdrop.onclick = () => close(false);
+
+            modal.classList.add('active');
+        });
+    }
+
     function renderArcsEditor(arcs) {
         const list = arcs?.length ? arcs : [{ name: '', trajectory: '', progress: 0, moments: [] }];
         const es = $('editor-struct');
@@ -1526,7 +1566,11 @@
         };
 
         // Main actions
-        $('btn-clear').onclick = () => postMsg('REQUEST_CLEAR');
+        $('btn-clear').onclick = async () => {
+            if (await showConfirm('清空数据', '确定要清空本聊天的所有总结、关键词及人物关系数据吗？此操作不可撤销。')) {
+                postMsg('REQUEST_CLEAR');
+            }
+        };
         $('btn-generate').onclick = () => {
             const btn = $('btn-generate');
             if (!localGenerating) {
diff --git a/modules/story-summary/story-summary.css b/modules/story-summary/story-summary.css
index 3e26a78..a7d8b2b 100644
--- a/modules/story-summary/story-summary.css
+++ b/modules/story-summary/story-summary.css
@@ -20,6 +20,10 @@
     padding-right: 4px;
 }
 
+.confirm-modal-box {
+    max-width: 440px;
+}
+
 .fact-group {
     margin-bottom: 12px;
 }
diff --git a/modules/story-summary/story-summary.html b/modules/story-summary/story-summary.html
index bf666ef..4e92395 100644
--- a/modules/story-summary/story-summary.html
+++ b/modules/story-summary/story-summary.html
@@ -833,6 +833,28 @@
 
     <script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
     <script src="story-summary-ui.js"></script>
+    <!-- Confirm Modal -->
+    <div class="modal" id="confirm-modal">
+        <div class="modal-bg" id="confirm-backdrop"></div>
+        <div class="modal-box confirm-modal-box">
+            <div class="modal-head">
+                <h2 id="confirm-title">确认操作</h2>
+                <button class="modal-close" id="confirm-close">
+                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                        <line x1="18" y1="6" x2="6" y2="18" />
+                        <line x1="6" y1="6" x2="18" y2="18" />
+                    </svg>
+                </button>
+            </div>
+            <div class="modal-body">
+                <div id="confirm-message" style="margin: 10px 0; line-height: 1.6; color: var(--fg);">内容</div>
+            </div>
+            <div class="modal-foot">
+                <button class="btn" id="confirm-cancel">取消</button>
+                <button class="btn btn-del" id="confirm-ok">执行</button>
+            </div>
+        </div>
+    </div>
 </body>
 
 </html>
diff --git a/modules/story-summary/story-summary.js b/modules/story-summary/story-summary.js
index eca1617..9cb242f 100644
--- a/modules/story-summary/story-summary.js
+++ b/modules/story-summary/story-summary.js
@@ -367,6 +367,18 @@ async function handleAnchorGenerate() {
             postToFrame({ type: "ANCHOR_GEN_PROGRESS", current, total, message });
         });
 
+        // Self-heal: if chunks are empty but boundary looks "already built",
+        // reset boundary so incremental L1 rebuild can start from floor 0.
+        const [meta, storageStats] = await Promise.all([
+            getMeta(chatId),
+            getStorageStats(chatId),
+        ]);
+        const lastFloor = (chat?.length || 0) - 1;
+        if (storageStats.chunks === 0 && lastFloor >= 0 && (meta.lastChunkFloor ?? -1) >= lastFloor) {
+            await updateMeta(chatId, { lastChunkFloor: -1 });
+            xbLog.warn(MODULE_ID, "Detected empty L1 chunks with full boundary, reset lastChunkFloor=-1");
+        }
+
         postToFrame({ type: "ANCHOR_GEN_PROGRESS", current: 0, total: 1, message: "向量化 L1..." });
         const chunkResult = await buildIncrementalChunks({ vectorConfig: vectorCfg });
 
@@ -449,6 +461,34 @@ async function handleGenerateVectors(vectorCfg) {
         await clearStateVectors(chatId);
         await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
 
+        // Helper to embed with retry
+        const embedWithRetry = async (texts, phase, currentBatchIdx, totalItems) => {
+            while (true) {
+                if (vectorCancelled) return null;
+                try {
+                    return await embed(texts, vectorCfg, { signal: vectorAbortController.signal });
+                } catch (e) {
+                    if (e?.name === "AbortError" || vectorCancelled) return null;
+                    xbLog.error(MODULE_ID, `${phase} 向量化单次失败`, e);
+
+                    // 等待 60 秒重试
+                    const waitSec = 60;
+                    for (let s = waitSec; s > 0; s--) {
+                        if (vectorCancelled) return null;
+                        postToFrame({
+                            type: "VECTOR_GEN_PROGRESS",
+                            phase,
+                            current: currentBatchIdx,
+                            total: totalItems,
+                            message: `触发限流，${s}s 后重试...`
+                        });
+                        await new Promise(r => setTimeout(r, 1000));
+                    }
+                    postToFrame({ type: "VECTOR_GEN_PROGRESS", phase, current: currentBatchIdx, total: totalItems, message: "正在重试..." });
+                }
+            }
+        };
+
         const atoms = getStateAtoms();
         if (!atoms.length) {
             postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: 0, total: 0, message: "L0 为空，跳过" });
@@ -462,29 +502,26 @@ async function handleGenerateVectors(vectorCfg) {
                 const batch = atoms.slice(i, i + batchSize);
                 const semTexts = batch.map(a => a.semantic);
                 const rTexts = batch.map(a => buildRAggregateText(a));
-                try {
-                    const vectors = await embed(semTexts.concat(rTexts), vectorCfg, { signal: vectorAbortController.signal });
-                    const split = semTexts.length;
-                    if (!Array.isArray(vectors) || vectors.length < split * 2) {
-                        throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
-                    }
-                    const semVectors = vectors.slice(0, split);
-                    const rVectors = vectors.slice(split, split + split);
-                    const items = batch.map((a, j) => ({
-                        atomId: a.atomId,
-                        floor: a.floor,
-                        vector: semVectors[j],
-                        rVector: rVectors[j] || semVectors[j],
-                    }));
-                    await saveStateVectors(chatId, items, fingerprint);
-                    l0Completed += batch.length;
-                    postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: l0Completed, total: atoms.length });
-                } catch (e) {
-                    if (e?.name === "AbortError") break;
-                    xbLog.error(MODULE_ID, "L0 向量化失败", e);
-                    vectorCancelled = true;
-                    break;
+
+                const vectors = await embedWithRetry(semTexts.concat(rTexts), "L0", l0Completed, atoms.length);
+                if (!vectors) break; // cancelled
+
+                const split = semTexts.length;
+                if (!Array.isArray(vectors) || vectors.length < split * 2) {
+                    xbLog.error(MODULE_ID, `embed长度不匹配: expect>=${split * 2}, got=${vectors?.length || 0}`);
+                    continue;
                 }
+                const semVectors = vectors.slice(0, split);
+                const rVectors = vectors.slice(split, split + split);
+                const items = batch.map((a, j) => ({
+                    atomId: a.atomId,
+                    floor: a.floor,
+                    vector: semVectors[j],
+                    rVector: rVectors[j] || semVectors[j],
+                }));
+                await saveStateVectors(chatId, items, fingerprint);
+                l0Completed += batch.length;
+                postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: l0Completed, total: atoms.length });
             }
         }
 
@@ -516,22 +553,18 @@ async function handleGenerateVectors(vectorCfg) {
 
                 const batch = allChunks.slice(i, i + batchSize);
                 const texts = batch.map(c => c.text);
-                try {
-                    const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal });
-                    const items = batch.map((c, j) => ({
-                        chunkId: c.chunkId,
-                        vector: vectors[j],
-                    }));
-                    await saveChunkVectors(chatId, items, fingerprint);
-                    l1Vectors = l1Vectors.concat(items);
-                    l1Completed += batch.length;
-                    postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: l1Completed, total: allChunks.length });
-                } catch (e) {
-                    if (e?.name === "AbortError") break;
-                    xbLog.error(MODULE_ID, "L1 向量化失败", e);
-                    vectorCancelled = true;
-                    break;
-                }
+
+                const vectors = await embedWithRetry(texts, "L1", l1Completed, allChunks.length);
+                if (!vectors) break; // cancelled
+
+                const items = batch.map((c, j) => ({
+                    chunkId: c.chunkId,
+                    vector: vectors[j],
+                }));
+                await saveChunkVectors(chatId, items, fingerprint);
+                l1Vectors = l1Vectors.concat(items);
+                l1Completed += batch.length;
+                postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: l1Completed, total: allChunks.length });
             }
         }
 
@@ -555,21 +588,17 @@ async function handleGenerateVectors(vectorCfg) {
 
                 const batch = l2Pairs.slice(i, i + batchSize);
                 const texts = batch.map(p => p.text);
-                try {
-                    const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal });
-                    const items = batch.map((p, idx) => ({
-                        eventId: p.id,
-                        vector: vectors[idx],
-                    }));
-                    await saveEventVectorsToDb(chatId, items, fingerprint);
-                    l2Completed += batch.length;
-                    postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Pairs.length });
-                } catch (e) {
-                    if (e?.name === "AbortError") break;
-                    xbLog.error(MODULE_ID, "L2 向量化失败", e);
-                    vectorCancelled = true;
-                    break;
-                }
+
+                const vectors = await embedWithRetry(texts, "L2", l2Completed, l2Pairs.length);
+                if (!vectors) break; // cancelled
+
+                const items = batch.map((p, idx) => ({
+                    eventId: p.id,
+                    vector: vectors[idx],
+                }));
+                await saveEventVectorsToDb(chatId, items, fingerprint);
+                l2Completed += batch.length;
+                postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Pairs.length });
             }
         }
 
@@ -598,7 +627,9 @@ async function handleClearVectors() {
     await clearEventVectors(chatId);
     await clearAllChunks(chatId);
     await clearStateVectors(chatId);
-    await updateMeta(chatId, { lastChunkFloor: -1 });
+    // Reset both boundary and fingerprint so next incremental build starts from floor 0
+    // without being blocked by stale engine fingerprint mismatch.
+    await updateMeta(chatId, { lastChunkFloor: -1, fingerprint: null });
     await sendVectorStatsToFrame();
     await executeSlashCommand('/echo severity=info 向量数据已清除。如需恢复召回功能，请重新点击"生成向量"。');
     xbLog.info(MODULE_ID, "向量数据已清除");
@@ -1138,7 +1169,7 @@ function updateFrameStatsAfterSummary(endMesId, merged) {
 // iframe 消息处理
 // ═══════════════════════════════════════════════════════════════════════════
 
-function handleFrameMessage(event) {
+async function handleFrameMessage(event) {
     const iframe = document.getElementById("xiaobaix-story-summary-iframe");
     if (!isTrustedMessage(event, iframe, "LittleWhiteBox-StoryFrame")) return;
 
@@ -1193,7 +1224,7 @@ function handleFrameMessage(event) {
             break;
 
         case "VECTOR_CLEAR":
-            handleClearVectors();
+            await handleClearVectors();
             break;
 
         case "VECTOR_CANCEL_GENERATE":
@@ -1204,11 +1235,11 @@ function handleFrameMessage(event) {
             break;
 
         case "ANCHOR_GENERATE":
-            handleAnchorGenerate();
+            await handleAnchorGenerate();
             break;
 
         case "ANCHOR_CLEAR":
-            handleAnchorClear();
+            await handleAnchorClear();
             break;
 
         case "ANCHOR_CANCEL":
diff --git a/modules/story-summary/vector/llm/llm-service.js b/modules/story-summary/vector/llm/llm-service.js
index 537f9eb..13ec391 100644
--- a/modules/story-summary/vector/llm/llm-service.js
+++ b/modules/story-summary/vector/llm/llm-service.js
@@ -3,6 +3,7 @@
 // ═══════════════════════════════════════════════════════════════════════════
 import { xbLog } from '../../../../core/debug-core.js';
 import { getVectorConfig } from '../../data/config.js';
+import { getApiKey } from './siliconflow.js';
 
 const MODULE_ID = 'vector-llm-service';
 const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
@@ -40,8 +41,7 @@ export async function callLLM(messages, options = {}) {
     const mod = getStreamingModule();
     if (!mod) throw new Error('Streaming module not ready');
 
-    const cfg = getVectorConfig();
-    const apiKey = cfg?.online?.key || '';
+    const apiKey = getApiKey() || '';
     if (!apiKey) {
         throw new Error('L0 requires siliconflow API key');
     }
diff --git a/modules/story-summary/vector/llm/siliconflow.js b/modules/story-summary/vector/llm/siliconflow.js
index bebdc74..1a7bb7d 100644
--- a/modules/story-summary/vector/llm/siliconflow.js
+++ b/modules/story-summary/vector/llm/siliconflow.js
@@ -1,21 +1,63 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// siliconflow.js - 仅保留 Embedding
+// siliconflow.js - Embedding + 多 Key 轮询
+//
+// 在 API Key 输入框中用逗号、分号、竖线或换行分隔多个 Key，例如：
+//   sk-aaa,sk-bbb,sk-ccc
+// 每次调用自动轮询到下一个 Key，并发请求会均匀分布到所有 Key 上。
 // ═══════════════════════════════════════════════════════════════════════════
 
 const BASE_URL = 'https://api.siliconflow.cn';
 const EMBEDDING_MODEL = 'BAAI/bge-m3';
 
-export function getApiKey() {
+// ★ 多 Key 轮询状态
+let _keyIndex = 0;
+
+/**
+ * 从 localStorage 解析所有 Key（支持逗号、分号、竖线、换行分隔）
+ */
+function parseKeys() {
     try {
         const raw = localStorage.getItem('summary_panel_config');
         if (raw) {
             const parsed = JSON.parse(raw);
-            return parsed.vector?.online?.key || null;
+            const keyStr = parsed.vector?.online?.key || '';
+            return keyStr
+                .split(/[,;|\n]+/)
+                .map(k => k.trim())
+                .filter(k => k.length > 0);
         }
     } catch { }
-    return null;
+    return [];
 }
 
+/**
+ * 获取下一个可用的 API Key（轮询）
+ * 每次调用返回不同的 Key，自动循环
+ */
+export function getApiKey() {
+    const keys = parseKeys();
+    if (!keys.length) return null;
+    if (keys.length === 1) return keys[0];
+
+    const idx = _keyIndex % keys.length;
+    const key = keys[idx];
+    _keyIndex = (_keyIndex + 1) % keys.length;
+    const masked = key.length > 10 ? key.slice(0, 6) + '***' + key.slice(-4) : '***';
+    console.log(`[SiliconFlow] 使用 Key ${idx + 1}/${keys.length}: ${masked}`);
+    return key;
+}
+
+/**
+ * 获取当前配置的 Key 数量（供外部模块动态调整并发用）
+ */
+export function getKeyCount() {
+    return Math.max(1, parseKeys().length);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Embedding
+// ═══════════════════════════════════════════════════════════════════════════
+
 export async function embed(texts, options = {}) {
     if (!texts?.length) return [];
 
diff --git a/modules/story-summary/vector/pipeline/state-integration.js b/modules/story-summary/vector/pipeline/state-integration.js
index 2838301..bd0516b 100644
--- a/modules/story-summary/vector/pipeline/state-integration.js
+++ b/modules/story-summary/vector/pipeline/state-integration.js
@@ -181,14 +181,83 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
     // ★ Phase 1: 收集所有新提取的 atoms（不向量化）
     const allNewAtoms = [];
 
-    // ★ 30 并发批次处理
-        // 并发池处理（保持固定并发度）
+    // ★ 限流检测：连续失败 N 次后暂停并降速
+    let consecutiveFailures = 0;
+    let rateLimited = false;
+    const RATE_LIMIT_THRESHOLD = 3;       // 连续失败多少次触发限流保护
+    const RATE_LIMIT_WAIT_MS = 60000;      // 限流后等待时间（60 秒）
+    const RETRY_INTERVAL_MS = 1000;        // 降速模式下每次请求间隔（1 秒）
+    const RETRY_CONCURRENCY = 1;           // ★ 降速模式下的并发数（默认1，建议不要超过5）
+
+    // ★ 通用处理单个 pair 的逻辑（复用于正常模式和降速模式）
+    const processPair = async (pair, idx, workerId) => {
+        const floor = pair.aiFloor;
+        const prev = getL0FloorStatus(floor);
+
+        active++;
+        if (active > peakActive) peakActive = active;
+        if (DEBUG_CONCURRENCY && (idx % 10 === 0)) {
+            xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`);
+        }
+
+        try {
+            const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
+
+            if (extractionCancelled) return;
+
+            if (atoms == null) {
+                throw new Error('llm_failed');
+            }
+
+            // ★ 成功：重置连续失败计数
+            consecutiveFailures = 0;
+
+            if (!atoms.length) {
+                setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
+            } else {
+                atoms.forEach(a => a.chatId = chatId);
+                saveStateAtoms(atoms);
+                allNewAtoms.push(...atoms);
+
+                setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
+                builtAtoms += atoms.length;
+            }
+        } catch (e) {
+            if (extractionCancelled) return;
+
+            setL0FloorStatus(floor, {
+                status: 'fail',
+                attempts: (prev?.attempts || 0) + 1,
+                reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
+            });
+            failed++;
+
+            // ★ 限流检测：连续失败累加
+            consecutiveFailures++;
+            if (consecutiveFailures >= RATE_LIMIT_THRESHOLD && !rateLimited) {
+                rateLimited = true;
+                xbLog.warn(MODULE_ID, `连续失败 ${consecutiveFailures} 次，疑似触发 API 限流，将暂停所有并发`);
+            }
+        } finally {
+            active--;
+            if (!extractionCancelled) {
+                completed++;
+                onProgress?.(`提取: ${completed}/${total}`, completed, total);
+            }
+            if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) {
+                const elapsed = Math.max(1, Math.round(performance.now() - tStart));
+                xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`);
+            }
+        }
+    };
+
+    // ★ 并发池处理（保持固定并发度）
     const poolSize = Math.min(CONCURRENCY, pendingPairs.length);
     let nextIndex = 0;
     let started = 0;
     const runWorker = async (workerId) => {
         while (true) {
-            if (extractionCancelled) return;
+            if (extractionCancelled || rateLimited) return;
             const idx = nextIndex++;
             if (idx >= pendingPairs.length) return;
 
@@ -198,57 +267,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
                 await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY));
             }
 
-            if (extractionCancelled) return;
+            if (extractionCancelled || rateLimited) return;
 
-            const floor = pair.aiFloor;
-            const prev = getL0FloorStatus(floor);
-
-            active++;
-            if (active > peakActive) peakActive = active;
-            if (DEBUG_CONCURRENCY && (idx % 10 === 0)) {
-                xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`);
-            }
-
-            try {
-                const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
-
-                if (extractionCancelled) return;
-
-                if (atoms == null) {
-                    throw new Error('llm_failed');
-                }
-
-                if (!atoms.length) {
-                    setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
-                } else {
-                    atoms.forEach(a => a.chatId = chatId);
-                    saveStateAtoms(atoms);
-                    // Phase 1: 只收集，不向量化
-                    allNewAtoms.push(...atoms);
-
-                    setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
-                    builtAtoms += atoms.length;
-                }
-            } catch (e) {
-                if (extractionCancelled) return;
-
-                setL0FloorStatus(floor, {
-                    status: 'fail',
-                    attempts: (prev?.attempts || 0) + 1,
-                    reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
-                });
-                failed++;
-            } finally {
-                active--;
-                if (!extractionCancelled) {
-                    completed++;
-                    onProgress?.(`提取: ${completed}/${total}`, completed, total);
-                }
-                if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) {
-                    const elapsed = Math.max(1, Math.round(performance.now() - tStart));
-                    xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`);
-                }
-            }
+            await processPair(pair, idx, workerId);
         }
     };
 
@@ -258,6 +279,61 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
         xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`);
     }
 
+    // ═════════════════════════════════════════════════════════════════════
+    // ★ 限流恢复：重置进度，从头开始以限速模式慢慢跑
+    // ═════════════════════════════════════════════════════════════════════
+    if (rateLimited && !extractionCancelled) {
+        const waitSec = RATE_LIMIT_WAIT_MS / 1000;
+        xbLog.info(MODULE_ID, `限流保护：将重置进度并从头开始降速重来（并发=${RETRY_CONCURRENCY}, 间隔=${RETRY_INTERVAL_MS}ms）`);
+        onProgress?.(`疑似限流，${waitSec}s 后降速重头开始...`, completed, total);
+
+        await new Promise(r => setTimeout(r, RATE_LIMIT_WAIT_MS));
+
+        if (!extractionCancelled) {
+            // ★ 核心逻辑：重置计数器，让 UI 从 0 开始跑，给用户“重头开始”的反馈
+            rateLimited = false;
+            consecutiveFailures = 0;
+            completed = 0;
+            failed = 0;
+
+            let retryNextIdx = 0;
+
+            xbLog.info(MODULE_ID, `限流恢复：开始降速模式扫描 ${pendingPairs.length} 个楼层`);
+
+            const retryWorkers = Math.min(RETRY_CONCURRENCY, pendingPairs.length);
+            const runRetryWorker = async (wid) => {
+                while (true) {
+                    if (extractionCancelled) return;
+                    const idx = retryNextIdx++;
+                    if (idx >= pendingPairs.length) return;
+
+                    const pair = pendingPairs[idx];
+                    const floor = pair.aiFloor;
+
+                    // ★ 检查该楼层状态
+                    const st = getL0FloorStatus(floor);
+                    if (st?.status === 'ok' || st?.status === 'empty') {
+                        // 刚才已经成功了，直接跳过（仅增加进度计数）
+                        completed++;
+                        onProgress?.(`提取: ${completed}/${total} (跳过已完成)`, completed, total);
+                        continue;
+                    }
+
+                    // ★ 没做过的，用 slow 模式处理
+                    await processPair(pair, idx, `retry-${wid}`);
+
+                    // 每个请求后休息，避免再次触发限流
+                    if (idx < pendingPairs.length - 1 && RETRY_INTERVAL_MS > 0) {
+                        await new Promise(r => setTimeout(r, RETRY_INTERVAL_MS));
+                    }
+                }
+            };
+
+            await Promise.all(Array.from({ length: retryWorkers }, (_, i) => runRetryWorker(i)));
+            xbLog.info(MODULE_ID, `降速重头开始阶段结束`);
+        }
+    }
+
     try {
         saveMetadataDebounced?.();
     } catch { }
diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js
index 3484c04..b049e32 100644
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -60,7 +60,7 @@ const CONFIG = {
     // Event (L2 Events)
     EVENT_CANDIDATE_MAX: 100,
     EVENT_SELECT_MAX: 50,
-    EVENT_MIN_SIMILARITY: 0.55,
+    EVENT_MIN_SIMILARITY: 0.60,
     EVENT_MMR_LAMBDA: 0.72,
     EVENT_ENTITY_BYPASS_SIM: 0.70,
 
@@ -79,7 +79,7 @@ const CONFIG = {
 
     // Rerank（floor-level）
     RERANK_TOP_N: 20,
-    RERANK_MIN_SCORE: 0.15,
+    RERANK_MIN_SCORE: 0.10,
 
     // 因果链
     CAUSAL_CHAIN_MAX_DEPTH: 10,