chore: update story summary and lint fixes

2026-02-08 12:22:45 +08:00
parent 56e30bfe02
commit d3d818da6a
15 changed files with 2479 additions and 852 deletions
--- a/modules/story-summary/vector/pipeline/state-integration.js
+++ b/modules/story-summary/vector/pipeline/state-integration.js
@@ -1,9 +1,11 @@
-// ============================================================================
-// state-integration.js - L0 记忆锚点管理
-// 支持增量提取、清空、取消
+// ============================================================================
+// state-integration.js - L0 状态层集成
+// Phase 1: 批量 LLM 提取（只存文本）
+// Phase 2: 统一向量化（提取完成后）
 // ============================================================================

 import { getContext } from '../../../../../../../extensions.js';
+import { saveMetadataDebounced } from '../../../../../../../extensions.js';
 import { xbLog } from '../../../../core/debug-core.js';
 import {
    saveStateAtoms,
@@ -26,9 +28,15 @@ import { filterText } from '../utils/text-filter.js';

 const MODULE_ID = 'state-integration';

+// ★ 并发配置
+const CONCURRENCY = 30;
+const STAGGER_DELAY = 30;
+
 let initialized = false;
+let extractionCancelled = false;

 export function cancelL0Extraction() {
+    extractionCancelled = true;
    cancelBatchExtraction();
 }

@@ -53,6 +61,7 @@ export async function getAnchorStats() {
        return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 };
    }

+    // 统计 AI 楼层
    const aiFloors = [];
    for (let i = 0; i < chat.length; i++) {
        if (!chat[i]?.is_user) aiFloors.push(i);
@@ -71,14 +80,20 @@ export async function getAnchorStats() {
    }

    const total = aiFloors.length;
-    const completed = ok + empty;
-    const pending = Math.max(0, total - completed);
+    const processed = ok + empty + fail;
+    const pending = Math.max(0, total - processed);

-    return { extracted: completed, total, pending, empty, fail };
+    return {
+        extracted: ok + empty,
+        total,
+        pending,
+        empty,
+        fail
+    };
 }

 // ============================================================================
-// 增量提取
+// 增量提取 - Phase 1 提取文本，Phase 2 统一向量化
 // ============================================================================

 function buildL0InputText(userMessage, aiMessage) {
@@ -102,6 +117,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) {
    const vectorCfg = getVectorConfig();
    if (!vectorCfg?.enabled) return { built: 0 };

+    // ★ 重置取消标志
+    extractionCancelled = false;
+
    const pendingPairs = [];

    for (let i = 0; i < chat.length; i++) {
@@ -109,6 +127,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) {
        if (!msg || msg.is_user) continue;

        const st = getL0FloorStatus(i);
+        // ★ 只跳过 ok 和 empty，fail 的可以重试
        if (st?.status === 'ok' || st?.status === 'empty') {
            continue;
        }
@@ -125,54 +144,109 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress) {
    }

    if (!pendingPairs.length) {
-        onProgress?.(0, 0, '已全部提取');
+        onProgress?.('已全部提取', 0, 0);
        return { built: 0 };
    }

-    xbLog.info(MODULE_ID, `增量 L0 提取：pending=${pendingPairs.length}`);
+    xbLog.info(MODULE_ID, `增量 L0 提取：pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`);

    let completed = 0;
+    let failed = 0;
    const total = pendingPairs.length;
    let builtAtoms = 0;

-    for (const pair of pendingPairs) {
-        const floor = pair.aiFloor;
-        const prev = getL0FloorStatus(floor);
+    // ★ Phase 1: 收集所有新提取的 atoms（不向量化）
+    const allNewAtoms = [];

-        try {
-            const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
+    // ★ 30 并发批次处理
+    for (let i = 0; i < pendingPairs.length; i += CONCURRENCY) {
+        // ★ 检查取消
+        if (extractionCancelled) {
+            xbLog.info(MODULE_ID, `用户取消，已完成 ${completed}/${total}`);
+            break;
+        }

-            if (atoms == null) {
-                throw new Error('llm_failed');
+        const batch = pendingPairs.slice(i, i + CONCURRENCY);
+
+        const promises = batch.map((pair, idx) => (async () => {
+            // 首批错开启动，避免瞬间打满
+            if (i === 0) {
+                await new Promise(r => setTimeout(r, idx * STAGGER_DELAY));
            }

-            if (!atoms.length) {
-                setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
-            } else {
-                atoms.forEach(a => a.chatId = chatId);
-                saveStateAtoms(atoms);
-                await vectorizeAtoms(chatId, atoms);
+            // 再次检查取消
+            if (extractionCancelled) return;

-                setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
-                builtAtoms += atoms.length;
+            const floor = pair.aiFloor;
+            const prev = getL0FloorStatus(floor);
+
+            try {
+                const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
+
+                if (extractionCancelled) return;
+
+                if (atoms == null) {
+                    throw new Error('llm_failed');
+                }
+
+                if (!atoms.length) {
+                    setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
+                } else {
+                    atoms.forEach(a => a.chatId = chatId);
+                    saveStateAtoms(atoms);
+                    // ★ Phase 1: 只收集，不向量化
+                    allNewAtoms.push(...atoms);
+
+                    setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
+                    builtAtoms += atoms.length;
+                }
+            } catch (e) {
+                if (extractionCancelled) return;
+
+                setL0FloorStatus(floor, {
+                    status: 'fail',
+                    attempts: (prev?.attempts || 0) + 1,
+                    reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
+                });
+                failed++;
+            } finally {
+                if (!extractionCancelled) {
+                    completed++;
+                    onProgress?.(`提取: ${completed}/${total}`, completed, total);
+                }
            }
-        } catch (e) {
-            setL0FloorStatus(floor, {
-                status: 'fail',
-                attempts: (prev?.attempts || 0) + 1,
-                reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
-            });
-        } finally {
-            completed++;
-            onProgress?.(`L0: ${completed}/${total}`, completed, total);
+        })());
+
+        await Promise.all(promises);
+
+        // 批次间短暂间隔
+        if (i + CONCURRENCY < pendingPairs.length && !extractionCancelled) {
+            await new Promise(r => setTimeout(r, 30));
        }
    }

-    xbLog.info(MODULE_ID, `增量 L0 完成：atoms=${builtAtoms}, floors=${pendingPairs.length}`);
+    // ★ 立即保存文本，不要等防抖
+    try {
+        saveMetadataDebounced?.();
+    } catch { }
+
+    // ★ Phase 2: 统一向量化所有新提取的 atoms
+    if (allNewAtoms.length > 0 && !extractionCancelled) {
+        onProgress?.(`向量化 L0: 0/${allNewAtoms.length}`, 0, allNewAtoms.length);
+        await vectorizeAtoms(chatId, allNewAtoms, (current, total) => {
+            onProgress?.(`向量化 L0: ${current}/${total}`, current, total);
+        });
+    }
+
+    xbLog.info(MODULE_ID, `L0 ${extractionCancelled ? '已取消' : '完成'}：atoms=${builtAtoms}, completed=${completed}/${total}, failed=${failed}`);
    return { built: builtAtoms };
 }

-async function vectorizeAtoms(chatId, atoms) {
+// ============================================================================
+// 向量化（支持进度回调）
+// ============================================================================
+
+async function vectorizeAtoms(chatId, atoms, onProgress) {
    if (!atoms?.length) return;

    const vectorCfg = getVectorConfig();
@@ -180,14 +254,27 @@ async function vectorizeAtoms(chatId, atoms) {

    const texts = atoms.map(a => a.semantic);
    const fingerprint = getEngineFingerprint(vectorCfg);
+    const batchSize = 20;

    try {
-        const vectors = await embed(texts, { timeout: 30000 });
+        const allVectors = [];

-        const items = atoms.map((a, i) => ({
+        for (let i = 0; i < texts.length; i += batchSize) {
+            if (extractionCancelled) break;
+
+            const batch = texts.slice(i, i + batchSize);
+            const vectors = await embed(batch, { timeout: 30000 });
+            allVectors.push(...vectors);
+
+            onProgress?.(allVectors.length, texts.length);
+        }
+
+        if (extractionCancelled) return;
+
+        const items = atoms.slice(0, allVectors.length).map((a, i) => ({
            atomId: a.atomId,
            floor: a.floor,
-            vector: vectors[i],
+            vector: allVectors[i],
        }));

        await saveStateVectors(chatId, items, fingerprint);
@@ -207,11 +294,17 @@ export async function clearAllAtomsAndVectors(chatId) {
    if (chatId) {
        await clearStateVectors(chatId);
    }
+
+    // ★ 立即保存
+    try {
+        saveMetadataDebounced?.();
+    } catch { }
+
    xbLog.info(MODULE_ID, '已清空所有记忆锚点');
 }

 // ============================================================================
-// 实时增量（AI 消息后触发）- 保留原有逻辑
+// 实时增量（AI 消息后触发）- 保持不变
 // ============================================================================

 let extractionQueue = [];
@@ -245,7 +338,9 @@ async function processQueue() {

            atoms.forEach(a => a.chatId = chatId);
            saveStateAtoms(atoms);
-            await vectorizeAtoms(chatId, atoms);
+
+            // 单楼实时处理：立即向量化
+            await vectorizeAtomsSimple(chatId, atoms);

            xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`);
        } catch (e) {
@@ -256,6 +351,31 @@ async function processQueue() {
    isProcessing = false;
 }

+// 简单向量化（无进度回调，用于单楼实时处理）
+async function vectorizeAtomsSimple(chatId, atoms) {
+    if (!atoms?.length) return;
+
+    const vectorCfg = getVectorConfig();
+    if (!vectorCfg?.enabled) return;
+
+    const texts = atoms.map(a => a.semantic);
+    const fingerprint = getEngineFingerprint(vectorCfg);
+
+    try {
+        const vectors = await embed(texts, { timeout: 30000 });
+
+        const items = atoms.map((a, i) => ({
+            atomId: a.atomId,
+            floor: a.floor,
+            vector: vectors[i],
+        }));
+
+        await saveStateVectors(chatId, items, fingerprint);
+    } catch (e) {
+        xbLog.error(MODULE_ID, 'L0 向量化失败', e);
+    }
+}
+
 // ============================================================================
 // 回滚钩子
 // ============================================================================
@@ -301,7 +421,7 @@ export async function rebuildStateVectors(chatId, vectorCfg) {
    xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`);

    await clearStateVectors(chatId);
-    await vectorizeAtoms(chatId, atoms);
+    await vectorizeAtomsSimple(chatId, atoms);

    return { built: atoms.length };
 }
--- a/modules/story-summary/vector/pipeline/state-recall.js
+++ b/modules/story-summary/vector/pipeline/state-recall.js
@@ -131,16 +131,44 @@ export function stateToVirtualChunks(l0Results) {
 // ═══════════════════════════════════════════════════════════════════════════

 /**
- * 合并 L0 和 L1 chunks，每楼层最多保留 limit 条
- * @param {Array} l0Chunks - 虚拟 chunks（已按相似度排序）
- * @param {Array} l1Chunks - 真实 chunks（已按相似度排序）
+ * 合并 L0 和 L1 chunks
+ * @param {Array} l0Chunks - L0 虚拟 chunks（带 similarity）
+ * @param {Array} l1Chunks - L1 真实 chunks（无 similarity）
 * @param {number} limit - 每楼层上限
 * @returns {Array} 合并后的 chunks
 */
 export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
+    // 构建 L0 楼层 → 最高 similarity 映射
+    const floorSimilarity = new Map();
+    for (const c of (l0Chunks || [])) {
+        const existing = floorSimilarity.get(c.floor) || 0;
+        if ((c.similarity || 0) > existing) {
+            floorSimilarity.set(c.floor, c.similarity || 0);
+        }
+    }
+
+    // L1 继承所属楼层的 L0 similarity
+    const l1WithScore = (l1Chunks || []).map(c => ({
+        ...c,
+        similarity: floorSimilarity.get(c.floor) || 0.5,
+    }));
+
    // 合并并按相似度排序
-    const all = [...(l0Chunks || []), ...(l1Chunks || [])]
-        .sort((a, b) => b.similarity - a.similarity);
+    const all = [...(l0Chunks || []), ...l1WithScore]
+        .sort((a, b) => {
+            // 相似度优先
+            const simDiff = (b.similarity || 0) - (a.similarity || 0);
+            if (Math.abs(simDiff) > 0.01) return simDiff;
+
+            // 同楼层：L0 优先于 L1
+            if (a.floor === b.floor) {
+                if (a.isL0 && !b.isL0) return -1;
+                if (!a.isL0 && b.isL0) return 1;
+            }
+
+            // 按楼层升序
+            return a.floor - b.floor;
+        });

    // 每楼层稀疏去重
    const byFloor = new Map();
@@ -153,8 +181,9 @@ export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
        }
    }

-    // 扁平化并保持相似度排序
+    // 扁平化并保持排序
    return Array.from(byFloor.values())
        .flat()
-        .sort((a, b) => b.similarity - a.similarity);
+        .sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
 }
+