From 59e7301bf8fc9616522da2c98db3bfcc72558105 Mon Sep 17 00:00:00 2001 From: RT15548 <168917470+RT15548@users.noreply.github.com> Date: Tue, 17 Feb 2026 15:24:39 +0800 Subject: [PATCH] Add files via upload --- modules/story-summary/generate/prompt.js | 76 +++++--- modules/story-summary/story-summary-a.css | 4 + modules/story-summary/story-summary-ui.js | 54 +++++- modules/story-summary/story-summary.css | 4 + modules/story-summary/story-summary.html | 22 +++ modules/story-summary/story-summary.js | 147 ++++++++------ .../story-summary/vector/llm/llm-service.js | 4 +- .../story-summary/vector/llm/siliconflow.js | 50 ++++- .../vector/pipeline/state-integration.js | 182 +++++++++++++----- .../story-summary/vector/retrieval/recall.js | 4 +- 10 files changed, 401 insertions(+), 146 deletions(-) diff --git a/modules/story-summary/generate/prompt.js b/modules/story-summary/generate/prompt.js index 162d650..8ac6d98 100644 --- a/modules/story-summary/generate/prompt.js +++ b/modules/story-summary/generate/prompt.js @@ -49,7 +49,7 @@ const CONSTRAINT_MAX = 2000; const ARCS_MAX = 1500; const EVENT_BUDGET_MAX = 5000; const RELATED_EVENT_MAX = 500; -const SUMMARIZED_EVIDENCE_MAX = 1500; +const SUMMARIZED_EVIDENCE_MAX = 2000; const UNSUMMARIZED_EVIDENCE_MAX = 2000; const TOP_N_STAR = 5; @@ -949,6 +949,8 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter const candidates = [...eventHits].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); const eventBudget = { used: 0, max: Math.min(EVENT_BUDGET_MAX, total.max - total.used) }; const relatedBudget = { used: 0, max: RELATED_EVENT_MAX }; + // Once budget becomes tight, keep high-score L2 summaries and stop attaching evidence. + let allowEventEvidence = true; const selectedDirect = []; const selectedRelated = []; @@ -964,27 +966,39 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter // 硬规则:RELATED 事件不挂证据(不挂 L0/L1,只保留事件摘要) // DIRECT 才允许收集事件内证据组。 - const evidenceGroups = isDirect + const useEvidenceForThisEvent = isDirect && allowEventEvidence; + const evidenceGroups = useEvidenceForThisEvent ? collectEvidenceGroupsForEvent(e.event, l0Selected, l1ByFloor, usedL0Ids) : []; // 格式化事件(含证据) const text = formatEventWithEvidence(e, 0, evidenceGroups, causalById); const cost = estimateTokens(text); + const fitEventBudget = eventBudget.used + cost <= eventBudget.max; + const fitRelatedBudget = isDirect || (relatedBudget.used + cost <= relatedBudget.max); // 预算检查:整个事件(含证据)作为原子单元 - if (total.used + cost > total.max) { + // 约束:总预算 + 事件预算 + related 子预算(若 applicable) + if (total.used + cost > total.max || !fitEventBudget || !fitRelatedBudget) { // 尝试不带证据的版本 const textNoEvidence = formatEventWithEvidence(e, 0, [], causalById); const costNoEvidence = estimateTokens(textNoEvidence); + const fitEventBudgetNoEvidence = eventBudget.used + costNoEvidence <= eventBudget.max; + const fitRelatedBudgetNoEvidence = isDirect || (relatedBudget.used + costNoEvidence <= relatedBudget.max); - if (total.used + costNoEvidence > total.max) { + if (total.used + costNoEvidence > total.max || !fitEventBudgetNoEvidence || !fitRelatedBudgetNoEvidence) { // 归还 usedL0Ids for (const group of evidenceGroups) { for (const l0 of group.l0Atoms) { usedL0Ids.delete(l0.id); } } + // Hard cap reached: no-evidence version also cannot fit total/event budget. + // Keep ranking semantics (higher-score events first): stop here. + if (total.used + costNoEvidence > total.max || !fitEventBudgetNoEvidence) { + break; + } + // Related sub-budget overflow: skip this related event and continue. continue; } @@ -994,6 +1008,10 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter usedL0Ids.delete(l0.id); } } + // Enter summary-only mode after first budget conflict on evidence. + if (useEvidenceForThisEvent && evidenceGroups.length > 0) { + allowEventEvidence = false; + } if (isDirect) { selectedDirect.push({ @@ -1112,26 +1130,32 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter if (distantL0.length && total.used < total.max) { const distantBudget = { used: 0, max: Math.min(SUMMARIZED_EVIDENCE_MAX, total.max - total.used) }; - // 按楼层排序(时间顺序)后分组 - distantL0.sort((a, b) => a.floor - b.floor); + // 先按分数挑组(高分优先),再按时间输出(楼层升序) const distantFloorMap = groupL0ByFloor(distantL0); - - // 按楼层顺序遍历(Map 保持插入顺序,distantL0 已按 floor 排序) + const distantRanked = []; for (const [floor, l0s] of distantFloorMap) { const group = buildEvidenceGroup(floor, l0s, l1ByFloor); + const bestScore = Math.max(...l0s.map(l0 => (l0.rerankScore ?? l0.similarity ?? 0))); + distantRanked.push({ group, bestScore }); + } + distantRanked.sort((a, b) => (b.bestScore - a.bestScore) || (a.group.floor - b.group.floor)); - // 原子组预算检查 + const acceptedDistantGroups = []; + for (const item of distantRanked) { + const group = item.group; if (distantBudget.used + group.totalTokens > distantBudget.max) continue; + distantBudget.used += group.totalTokens; + acceptedDistantGroups.push(group); + for (const l0 of group.l0Atoms) usedL0Ids.add(l0.id); + injectionStats.distantEvidence.units++; + } + acceptedDistantGroups.sort((a, b) => a.floor - b.floor); + for (const group of acceptedDistantGroups) { const groupLines = formatEvidenceGroup(group); for (const line of groupLines) { assembled.distantEvidence.lines.push(line); } - distantBudget.used += group.totalTokens; - for (const l0 of l0s) { - usedL0Ids.add(l0.id); - } - injectionStats.distantEvidence.units++; } assembled.distantEvidence.tokens = distantBudget.used; @@ -1154,24 +1178,32 @@ async function buildVectorPrompt(store, recallResult, causalById, focusCharacter if (recentL0.length) { const recentBudget = { used: 0, max: UNSUMMARIZED_EVIDENCE_MAX }; - // 按楼层排序后分组 - recentL0.sort((a, b) => a.floor - b.floor); + // 先按分数挑组(高分优先),再按时间输出(楼层升序) const recentFloorMap = groupL0ByFloor(recentL0); - + const recentRanked = []; for (const [floor, l0s] of recentFloorMap) { const group = buildEvidenceGroup(floor, l0s, l1ByFloor); + const bestScore = Math.max(...l0s.map(l0 => (l0.rerankScore ?? l0.similarity ?? 0))); + recentRanked.push({ group, bestScore }); + } + recentRanked.sort((a, b) => (b.bestScore - a.bestScore) || (a.group.floor - b.group.floor)); + const acceptedRecentGroups = []; + for (const item of recentRanked) { + const group = item.group; if (recentBudget.used + group.totalTokens > recentBudget.max) continue; + recentBudget.used += group.totalTokens; + acceptedRecentGroups.push(group); + for (const l0 of group.l0Atoms) usedL0Ids.add(l0.id); + injectionStats.recentEvidence.units++; + } + acceptedRecentGroups.sort((a, b) => a.floor - b.floor); + for (const group of acceptedRecentGroups) { const groupLines = formatEvidenceGroup(group); for (const line of groupLines) { assembled.recentEvidence.lines.push(line); } - recentBudget.used += group.totalTokens; - for (const l0 of l0s) { - usedL0Ids.add(l0.id); - } - injectionStats.recentEvidence.units++; } assembled.recentEvidence.tokens = recentBudget.used; diff --git a/modules/story-summary/story-summary-a.css b/modules/story-summary/story-summary-a.css index 83157dc..8db28eb 100644 --- a/modules/story-summary/story-summary-a.css +++ b/modules/story-summary/story-summary-a.css @@ -21,6 +21,10 @@ padding-right: 4px; } +.confirm-modal-box { + max-width: 440px; +} + .fact-group { margin-bottom: 12px; } diff --git a/modules/story-summary/story-summary-ui.js b/modules/story-summary/story-summary-ui.js index 7cff296..6a4d5cb 100644 --- a/modules/story-summary/story-summary-ui.js +++ b/modules/story-summary/story-summary-ui.js @@ -358,8 +358,8 @@ postMsg('ANCHOR_GENERATE'); }; - $('btn-anchor-clear').onclick = () => { - if (confirm('清空所有记忆锚点?(L0 向量也会一并清除)')) { + $('btn-anchor-clear').onclick = async () => { + if (await showConfirm('清空锚点', '清空所有记忆锚点?(L0 向量也会一并清除)')) { postMsg('ANCHOR_CLEAR'); } }; @@ -375,6 +375,7 @@ }; $('btn-test-vector-api').onclick = () => { + saveConfig(); // 先保存新 Key 到 localStorage postMsg('VECTOR_TEST_ONLINE', { provider: 'siliconflow', config: { @@ -391,8 +392,10 @@ postMsg('VECTOR_GENERATE', { config: getVectorConfig() }); }; - $('btn-clear-vectors').onclick = () => { - if (confirm('确定清空所有向量数据?')) postMsg('VECTOR_CLEAR'); + $('btn-clear-vectors').onclick = async () => { + if (await showConfirm('清空向量', '确定清空所有向量数据?')) { + postMsg('VECTOR_CLEAR'); + } }; $('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE'); @@ -955,6 +958,43 @@ postMsg('FULLSCREEN_CLOSED'); } + /** + * 显示通用确认弹窗 + * @returns {Promise} + */ + function showConfirm(title, message, okText = '执行', cancelText = '取消') { + return new Promise(resolve => { + const modal = $('confirm-modal'); + const titleEl = $('confirm-title'); + const msgEl = $('confirm-message'); + const okBtn = $('confirm-ok'); + const cancelBtn = $('confirm-cancel'); + const closeBtn = $('confirm-close'); + const backdrop = $('confirm-backdrop'); + + titleEl.textContent = title; + msgEl.textContent = message; + okBtn.textContent = okText; + cancelBtn.textContent = cancelText; + + const close = (result) => { + modal.classList.remove('active'); + okBtn.onclick = null; + cancelBtn.onclick = null; + closeBtn.onclick = null; + backdrop.onclick = null; + resolve(result); + }; + + okBtn.onclick = () => close(true); + cancelBtn.onclick = () => close(false); + closeBtn.onclick = () => close(false); + backdrop.onclick = () => close(false); + + modal.classList.add('active'); + }); + } + function renderArcsEditor(arcs) { const list = arcs?.length ? arcs : [{ name: '', trajectory: '', progress: 0, moments: [] }]; const es = $('editor-struct'); @@ -1526,7 +1566,11 @@ }; // Main actions - $('btn-clear').onclick = () => postMsg('REQUEST_CLEAR'); + $('btn-clear').onclick = async () => { + if (await showConfirm('清空数据', '确定要清空本聊天的所有总结、关键词及人物关系数据吗?此操作不可撤销。')) { + postMsg('REQUEST_CLEAR'); + } + }; $('btn-generate').onclick = () => { const btn = $('btn-generate'); if (!localGenerating) { diff --git a/modules/story-summary/story-summary.css b/modules/story-summary/story-summary.css index 3e26a78..a7d8b2b 100644 --- a/modules/story-summary/story-summary.css +++ b/modules/story-summary/story-summary.css @@ -20,6 +20,10 @@ padding-right: 4px; } +.confirm-modal-box { + max-width: 440px; +} + .fact-group { margin-bottom: 12px; } diff --git a/modules/story-summary/story-summary.html b/modules/story-summary/story-summary.html index bf666ef..4e92395 100644 --- a/modules/story-summary/story-summary.html +++ b/modules/story-summary/story-summary.html @@ -833,6 +833,28 @@ + + diff --git a/modules/story-summary/story-summary.js b/modules/story-summary/story-summary.js index eca1617..9cb242f 100644 --- a/modules/story-summary/story-summary.js +++ b/modules/story-summary/story-summary.js @@ -367,6 +367,18 @@ async function handleAnchorGenerate() { postToFrame({ type: "ANCHOR_GEN_PROGRESS", current, total, message }); }); + // Self-heal: if chunks are empty but boundary looks "already built", + // reset boundary so incremental L1 rebuild can start from floor 0. + const [meta, storageStats] = await Promise.all([ + getMeta(chatId), + getStorageStats(chatId), + ]); + const lastFloor = (chat?.length || 0) - 1; + if (storageStats.chunks === 0 && lastFloor >= 0 && (meta.lastChunkFloor ?? -1) >= lastFloor) { + await updateMeta(chatId, { lastChunkFloor: -1 }); + xbLog.warn(MODULE_ID, "Detected empty L1 chunks with full boundary, reset lastChunkFloor=-1"); + } + postToFrame({ type: "ANCHOR_GEN_PROGRESS", current: 0, total: 1, message: "向量化 L1..." }); const chunkResult = await buildIncrementalChunks({ vectorConfig: vectorCfg }); @@ -449,6 +461,34 @@ async function handleGenerateVectors(vectorCfg) { await clearStateVectors(chatId); await updateMeta(chatId, { lastChunkFloor: -1, fingerprint }); + // Helper to embed with retry + const embedWithRetry = async (texts, phase, currentBatchIdx, totalItems) => { + while (true) { + if (vectorCancelled) return null; + try { + return await embed(texts, vectorCfg, { signal: vectorAbortController.signal }); + } catch (e) { + if (e?.name === "AbortError" || vectorCancelled) return null; + xbLog.error(MODULE_ID, `${phase} 向量化单次失败`, e); + + // 等待 60 秒重试 + const waitSec = 60; + for (let s = waitSec; s > 0; s--) { + if (vectorCancelled) return null; + postToFrame({ + type: "VECTOR_GEN_PROGRESS", + phase, + current: currentBatchIdx, + total: totalItems, + message: `触发限流,${s}s 后重试...` + }); + await new Promise(r => setTimeout(r, 1000)); + } + postToFrame({ type: "VECTOR_GEN_PROGRESS", phase, current: currentBatchIdx, total: totalItems, message: "正在重试..." }); + } + } + }; + const atoms = getStateAtoms(); if (!atoms.length) { postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: 0, total: 0, message: "L0 为空,跳过" }); @@ -462,29 +502,26 @@ async function handleGenerateVectors(vectorCfg) { const batch = atoms.slice(i, i + batchSize); const semTexts = batch.map(a => a.semantic); const rTexts = batch.map(a => buildRAggregateText(a)); - try { - const vectors = await embed(semTexts.concat(rTexts), vectorCfg, { signal: vectorAbortController.signal }); - const split = semTexts.length; - if (!Array.isArray(vectors) || vectors.length < split * 2) { - throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`); - } - const semVectors = vectors.slice(0, split); - const rVectors = vectors.slice(split, split + split); - const items = batch.map((a, j) => ({ - atomId: a.atomId, - floor: a.floor, - vector: semVectors[j], - rVector: rVectors[j] || semVectors[j], - })); - await saveStateVectors(chatId, items, fingerprint); - l0Completed += batch.length; - postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: l0Completed, total: atoms.length }); - } catch (e) { - if (e?.name === "AbortError") break; - xbLog.error(MODULE_ID, "L0 向量化失败", e); - vectorCancelled = true; - break; + + const vectors = await embedWithRetry(semTexts.concat(rTexts), "L0", l0Completed, atoms.length); + if (!vectors) break; // cancelled + + const split = semTexts.length; + if (!Array.isArray(vectors) || vectors.length < split * 2) { + xbLog.error(MODULE_ID, `embed长度不匹配: expect>=${split * 2}, got=${vectors?.length || 0}`); + continue; } + const semVectors = vectors.slice(0, split); + const rVectors = vectors.slice(split, split + split); + const items = batch.map((a, j) => ({ + atomId: a.atomId, + floor: a.floor, + vector: semVectors[j], + rVector: rVectors[j] || semVectors[j], + })); + await saveStateVectors(chatId, items, fingerprint); + l0Completed += batch.length; + postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L0", current: l0Completed, total: atoms.length }); } } @@ -516,22 +553,18 @@ async function handleGenerateVectors(vectorCfg) { const batch = allChunks.slice(i, i + batchSize); const texts = batch.map(c => c.text); - try { - const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal }); - const items = batch.map((c, j) => ({ - chunkId: c.chunkId, - vector: vectors[j], - })); - await saveChunkVectors(chatId, items, fingerprint); - l1Vectors = l1Vectors.concat(items); - l1Completed += batch.length; - postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: l1Completed, total: allChunks.length }); - } catch (e) { - if (e?.name === "AbortError") break; - xbLog.error(MODULE_ID, "L1 向量化失败", e); - vectorCancelled = true; - break; - } + + const vectors = await embedWithRetry(texts, "L1", l1Completed, allChunks.length); + if (!vectors) break; // cancelled + + const items = batch.map((c, j) => ({ + chunkId: c.chunkId, + vector: vectors[j], + })); + await saveChunkVectors(chatId, items, fingerprint); + l1Vectors = l1Vectors.concat(items); + l1Completed += batch.length; + postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: l1Completed, total: allChunks.length }); } } @@ -555,21 +588,17 @@ async function handleGenerateVectors(vectorCfg) { const batch = l2Pairs.slice(i, i + batchSize); const texts = batch.map(p => p.text); - try { - const vectors = await embed(texts, vectorCfg, { signal: vectorAbortController.signal }); - const items = batch.map((p, idx) => ({ - eventId: p.id, - vector: vectors[idx], - })); - await saveEventVectorsToDb(chatId, items, fingerprint); - l2Completed += batch.length; - postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Pairs.length }); - } catch (e) { - if (e?.name === "AbortError") break; - xbLog.error(MODULE_ID, "L2 向量化失败", e); - vectorCancelled = true; - break; - } + + const vectors = await embedWithRetry(texts, "L2", l2Completed, l2Pairs.length); + if (!vectors) break; // cancelled + + const items = batch.map((p, idx) => ({ + eventId: p.id, + vector: vectors[idx], + })); + await saveEventVectorsToDb(chatId, items, fingerprint); + l2Completed += batch.length; + postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Pairs.length }); } } @@ -598,7 +627,9 @@ async function handleClearVectors() { await clearEventVectors(chatId); await clearAllChunks(chatId); await clearStateVectors(chatId); - await updateMeta(chatId, { lastChunkFloor: -1 }); + // Reset both boundary and fingerprint so next incremental build starts from floor 0 + // without being blocked by stale engine fingerprint mismatch. + await updateMeta(chatId, { lastChunkFloor: -1, fingerprint: null }); await sendVectorStatsToFrame(); await executeSlashCommand('/echo severity=info 向量数据已清除。如需恢复召回功能,请重新点击"生成向量"。'); xbLog.info(MODULE_ID, "向量数据已清除"); @@ -1138,7 +1169,7 @@ function updateFrameStatsAfterSummary(endMesId, merged) { // iframe 消息处理 // ═══════════════════════════════════════════════════════════════════════════ -function handleFrameMessage(event) { +async function handleFrameMessage(event) { const iframe = document.getElementById("xiaobaix-story-summary-iframe"); if (!isTrustedMessage(event, iframe, "LittleWhiteBox-StoryFrame")) return; @@ -1193,7 +1224,7 @@ function handleFrameMessage(event) { break; case "VECTOR_CLEAR": - handleClearVectors(); + await handleClearVectors(); break; case "VECTOR_CANCEL_GENERATE": @@ -1204,11 +1235,11 @@ function handleFrameMessage(event) { break; case "ANCHOR_GENERATE": - handleAnchorGenerate(); + await handleAnchorGenerate(); break; case "ANCHOR_CLEAR": - handleAnchorClear(); + await handleAnchorClear(); break; case "ANCHOR_CANCEL": diff --git a/modules/story-summary/vector/llm/llm-service.js b/modules/story-summary/vector/llm/llm-service.js index 537f9eb..13ec391 100644 --- a/modules/story-summary/vector/llm/llm-service.js +++ b/modules/story-summary/vector/llm/llm-service.js @@ -3,6 +3,7 @@ // ═══════════════════════════════════════════════════════════════════════════ import { xbLog } from '../../../../core/debug-core.js'; import { getVectorConfig } from '../../data/config.js'; +import { getApiKey } from './siliconflow.js'; const MODULE_ID = 'vector-llm-service'; const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1'; @@ -40,8 +41,7 @@ export async function callLLM(messages, options = {}) { const mod = getStreamingModule(); if (!mod) throw new Error('Streaming module not ready'); - const cfg = getVectorConfig(); - const apiKey = cfg?.online?.key || ''; + const apiKey = getApiKey() || ''; if (!apiKey) { throw new Error('L0 requires siliconflow API key'); } diff --git a/modules/story-summary/vector/llm/siliconflow.js b/modules/story-summary/vector/llm/siliconflow.js index bebdc74..1a7bb7d 100644 --- a/modules/story-summary/vector/llm/siliconflow.js +++ b/modules/story-summary/vector/llm/siliconflow.js @@ -1,21 +1,63 @@ // ═══════════════════════════════════════════════════════════════════════════ -// siliconflow.js - 仅保留 Embedding +// siliconflow.js - Embedding + 多 Key 轮询 +// +// 在 API Key 输入框中用逗号、分号、竖线或换行分隔多个 Key,例如: +// sk-aaa,sk-bbb,sk-ccc +// 每次调用自动轮询到下一个 Key,并发请求会均匀分布到所有 Key 上。 // ═══════════════════════════════════════════════════════════════════════════ const BASE_URL = 'https://api.siliconflow.cn'; const EMBEDDING_MODEL = 'BAAI/bge-m3'; -export function getApiKey() { +// ★ 多 Key 轮询状态 +let _keyIndex = 0; + +/** + * 从 localStorage 解析所有 Key(支持逗号、分号、竖线、换行分隔) + */ +function parseKeys() { try { const raw = localStorage.getItem('summary_panel_config'); if (raw) { const parsed = JSON.parse(raw); - return parsed.vector?.online?.key || null; + const keyStr = parsed.vector?.online?.key || ''; + return keyStr + .split(/[,;|\n]+/) + .map(k => k.trim()) + .filter(k => k.length > 0); } } catch { } - return null; + return []; } +/** + * 获取下一个可用的 API Key(轮询) + * 每次调用返回不同的 Key,自动循环 + */ +export function getApiKey() { + const keys = parseKeys(); + if (!keys.length) return null; + if (keys.length === 1) return keys[0]; + + const idx = _keyIndex % keys.length; + const key = keys[idx]; + _keyIndex = (_keyIndex + 1) % keys.length; + const masked = key.length > 10 ? key.slice(0, 6) + '***' + key.slice(-4) : '***'; + console.log(`[SiliconFlow] 使用 Key ${idx + 1}/${keys.length}: ${masked}`); + return key; +} + +/** + * 获取当前配置的 Key 数量(供外部模块动态调整并发用) + */ +export function getKeyCount() { + return Math.max(1, parseKeys().length); +} + +// ═══════════════════════════════════════════════════════════════════════════ +// Embedding +// ═══════════════════════════════════════════════════════════════════════════ + export async function embed(texts, options = {}) { if (!texts?.length) return []; diff --git a/modules/story-summary/vector/pipeline/state-integration.js b/modules/story-summary/vector/pipeline/state-integration.js index 2838301..bd0516b 100644 --- a/modules/story-summary/vector/pipeline/state-integration.js +++ b/modules/story-summary/vector/pipeline/state-integration.js @@ -181,14 +181,83 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options // ★ Phase 1: 收集所有新提取的 atoms(不向量化) const allNewAtoms = []; - // ★ 30 并发批次处理 - // 并发池处理(保持固定并发度) + // ★ 限流检测:连续失败 N 次后暂停并降速 + let consecutiveFailures = 0; + let rateLimited = false; + const RATE_LIMIT_THRESHOLD = 3; // 连续失败多少次触发限流保护 + const RATE_LIMIT_WAIT_MS = 60000; // 限流后等待时间(60 秒) + const RETRY_INTERVAL_MS = 1000; // 降速模式下每次请求间隔(1 秒) + const RETRY_CONCURRENCY = 1; // ★ 降速模式下的并发数(默认1,建议不要超过5) + + // ★ 通用处理单个 pair 的逻辑(复用于正常模式和降速模式) + const processPair = async (pair, idx, workerId) => { + const floor = pair.aiFloor; + const prev = getL0FloorStatus(floor); + + active++; + if (active > peakActive) peakActive = active; + if (DEBUG_CONCURRENCY && (idx % 10 === 0)) { + xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`); + } + + try { + const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 }); + + if (extractionCancelled) return; + + if (atoms == null) { + throw new Error('llm_failed'); + } + + // ★ 成功:重置连续失败计数 + consecutiveFailures = 0; + + if (!atoms.length) { + setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 }); + } else { + atoms.forEach(a => a.chatId = chatId); + saveStateAtoms(atoms); + allNewAtoms.push(...atoms); + + setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length }); + builtAtoms += atoms.length; + } + } catch (e) { + if (extractionCancelled) return; + + setL0FloorStatus(floor, { + status: 'fail', + attempts: (prev?.attempts || 0) + 1, + reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120), + }); + failed++; + + // ★ 限流检测:连续失败累加 + consecutiveFailures++; + if (consecutiveFailures >= RATE_LIMIT_THRESHOLD && !rateLimited) { + rateLimited = true; + xbLog.warn(MODULE_ID, `连续失败 ${consecutiveFailures} 次,疑似触发 API 限流,将暂停所有并发`); + } + } finally { + active--; + if (!extractionCancelled) { + completed++; + onProgress?.(`提取: ${completed}/${total}`, completed, total); + } + if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) { + const elapsed = Math.max(1, Math.round(performance.now() - tStart)); + xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`); + } + } + }; + + // ★ 并发池处理(保持固定并发度) const poolSize = Math.min(CONCURRENCY, pendingPairs.length); let nextIndex = 0; let started = 0; const runWorker = async (workerId) => { while (true) { - if (extractionCancelled) return; + if (extractionCancelled || rateLimited) return; const idx = nextIndex++; if (idx >= pendingPairs.length) return; @@ -198,57 +267,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY)); } - if (extractionCancelled) return; + if (extractionCancelled || rateLimited) return; - const floor = pair.aiFloor; - const prev = getL0FloorStatus(floor); - - active++; - if (active > peakActive) peakActive = active; - if (DEBUG_CONCURRENCY && (idx % 10 === 0)) { - xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`); - } - - try { - const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 }); - - if (extractionCancelled) return; - - if (atoms == null) { - throw new Error('llm_failed'); - } - - if (!atoms.length) { - setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 }); - } else { - atoms.forEach(a => a.chatId = chatId); - saveStateAtoms(atoms); - // Phase 1: 只收集,不向量化 - allNewAtoms.push(...atoms); - - setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length }); - builtAtoms += atoms.length; - } - } catch (e) { - if (extractionCancelled) return; - - setL0FloorStatus(floor, { - status: 'fail', - attempts: (prev?.attempts || 0) + 1, - reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120), - }); - failed++; - } finally { - active--; - if (!extractionCancelled) { - completed++; - onProgress?.(`提取: ${completed}/${total}`, completed, total); - } - if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) { - const elapsed = Math.max(1, Math.round(performance.now() - tStart)); - xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`); - } - } + await processPair(pair, idx, workerId); } }; @@ -258,6 +279,61 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`); } + // ═════════════════════════════════════════════════════════════════════ + // ★ 限流恢复:重置进度,从头开始以限速模式慢慢跑 + // ═════════════════════════════════════════════════════════════════════ + if (rateLimited && !extractionCancelled) { + const waitSec = RATE_LIMIT_WAIT_MS / 1000; + xbLog.info(MODULE_ID, `限流保护:将重置进度并从头开始降速重来(并发=${RETRY_CONCURRENCY}, 间隔=${RETRY_INTERVAL_MS}ms)`); + onProgress?.(`疑似限流,${waitSec}s 后降速重头开始...`, completed, total); + + await new Promise(r => setTimeout(r, RATE_LIMIT_WAIT_MS)); + + if (!extractionCancelled) { + // ★ 核心逻辑:重置计数器,让 UI 从 0 开始跑,给用户“重头开始”的反馈 + rateLimited = false; + consecutiveFailures = 0; + completed = 0; + failed = 0; + + let retryNextIdx = 0; + + xbLog.info(MODULE_ID, `限流恢复:开始降速模式扫描 ${pendingPairs.length} 个楼层`); + + const retryWorkers = Math.min(RETRY_CONCURRENCY, pendingPairs.length); + const runRetryWorker = async (wid) => { + while (true) { + if (extractionCancelled) return; + const idx = retryNextIdx++; + if (idx >= pendingPairs.length) return; + + const pair = pendingPairs[idx]; + const floor = pair.aiFloor; + + // ★ 检查该楼层状态 + const st = getL0FloorStatus(floor); + if (st?.status === 'ok' || st?.status === 'empty') { + // 刚才已经成功了,直接跳过(仅增加进度计数) + completed++; + onProgress?.(`提取: ${completed}/${total} (跳过已完成)`, completed, total); + continue; + } + + // ★ 没做过的,用 slow 模式处理 + await processPair(pair, idx, `retry-${wid}`); + + // 每个请求后休息,避免再次触发限流 + if (idx < pendingPairs.length - 1 && RETRY_INTERVAL_MS > 0) { + await new Promise(r => setTimeout(r, RETRY_INTERVAL_MS)); + } + } + }; + + await Promise.all(Array.from({ length: retryWorkers }, (_, i) => runRetryWorker(i))); + xbLog.info(MODULE_ID, `降速重头开始阶段结束`); + } + } + try { saveMetadataDebounced?.(); } catch { } diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js index 3484c04..b049e32 100644 --- a/modules/story-summary/vector/retrieval/recall.js +++ b/modules/story-summary/vector/retrieval/recall.js @@ -60,7 +60,7 @@ const CONFIG = { // Event (L2 Events) EVENT_CANDIDATE_MAX: 100, EVENT_SELECT_MAX: 50, - EVENT_MIN_SIMILARITY: 0.55, + EVENT_MIN_SIMILARITY: 0.60, EVENT_MMR_LAMBDA: 0.72, EVENT_ENTITY_BYPASS_SIM: 0.70, @@ -79,7 +79,7 @@ const CONFIG = { // Rerank(floor-level) RERANK_TOP_N: 20, - RERANK_MIN_SCORE: 0.15, + RERANK_MIN_SCORE: 0.10, // 因果链 CAUSAL_CHAIN_MAX_DEPTH: 10,