Add L0 index and anchor UI updates

2026-02-06 11:22:02 +08:00
parent c36efe6805
commit 44ca06f9b9
23 changed files with 1749 additions and 3898 deletions
--- a/modules/story-summary/vector/pipeline/chunk-builder.js
+++ b/modules/story-summary/vector/pipeline/chunk-builder.js
@@ -1,4 +1,4 @@
-// ═══════════════════════════════════════════════════════════════════════════
+// ═══════════════════════════════════════════════════════════════════════════
 // Story Summary - Chunk Builder
 // 标准 RAG chunking: ~200 tokens per chunk
 // ═══════════════════════════════════════════════════════════════════════════
@@ -19,6 +19,7 @@ import {
 import { embed, getEngineFingerprint } from '../utils/embedder.js';
 import { xbLog } from '../../../../core/debug-core.js';
 import { filterText } from '../utils/text-filter.js';
+import { extractAndStoreAtomsForRound } from './state-integration.js';

 const MODULE_ID = 'chunk-builder';

@@ -201,8 +202,7 @@ export async function buildAllChunks(options = {}) {
    await saveChunks(chatId, allChunks);

    const texts = allChunks.map(c => c.text);
-    const isLocal = vectorConfig.engine === 'local';
-    const batchSize = isLocal ? 5 : 20;
+    const batchSize = 20;

    let completed = 0;
    let errors = 0;
@@ -302,6 +302,7 @@ export async function buildIncrementalChunks(options = {}) {
    }
 }

+
 // ═══════════════════════════════════════════════════════════════════════════
 // L1 同步（消息变化时调用）
 // ═══════════════════════════════════════════════════════════════════════════
@@ -337,13 +338,6 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
    if (!chatId || lastFloor < 0 || !message) return;
    if (!vectorConfig?.enabled) return;

-    // 本地模型未加载时跳过（避免意外触发下载或报错）
-    if (vectorConfig.engine === "local") {
-        const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
-        const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
-        if (!isLocalModelLoaded(modelId)) return;
-    }
-
    // 删除该楼层旧的
    await deleteChunksAtFloor(chatId, lastFloor);

@@ -367,4 +361,18 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
    } catch (e) {
        xbLog.error(MODULE_ID, `消息同步失败：floor ${lastFloor}`, e);
    }
+    // L0 配对提取（仅 AI 消息触发）
+    if (!message.is_user) {
+        const { chat } = getContext();
+        const userFloor = lastFloor - 1;
+        const userMessage = (userFloor >= 0 && chat[userFloor]?.is_user) ? chat[userFloor] : null;
+
+        try {
+            await extractAndStoreAtomsForRound(lastFloor, message, userMessage);
+        } catch (e) {
+            xbLog.warn(MODULE_ID, `Atom 提取失败: floor ${lastFloor}`, e);
+        }
+    }
 }
+
+
--- a/modules/story-summary/vector/pipeline/state-integration.js
+++ b/modules/story-summary/vector/pipeline/state-integration.js
@@ -1,7 +1,7 @@
-// ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - State Integration (L0)
-// 事件监听 + 回滚钩子注册
-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================
+// state-integration.js - L0 记忆锚点管理
+// 支持增量提取、清空、取消
+// ============================================================================

 import { getContext } from '../../../../../../../extensions.js';
 import { xbLog } from '../../../../core/debug-core.js';
@@ -11,70 +11,174 @@ import {
    deleteStateAtomsFromFloor,
    deleteStateVectorsFromFloor,
    getStateAtoms,
+    clearStateAtoms,
    clearStateVectors,
+    getL0FloorStatus,
+    setL0FloorStatus,
+    clearL0Index,
+    deleteL0IndexFromFloor,
 } from '../storage/state-store.js';
-import { embed, getEngineFingerprint } from '../utils/embedder.js';
+import { embed } from '../llm/siliconflow.js';
+import { extractAtomsForRound, cancelBatchExtraction } from '../llm/atom-extraction.js';
 import { getVectorConfig } from '../../data/config.js';
+import { getEngineFingerprint } from '../utils/embedder.js';
+import { filterText } from '../utils/text-filter.js';

 const MODULE_ID = 'state-integration';

 let initialized = false;

-// ═══════════════════════════════════════════════════════════════════════════
+export function cancelL0Extraction() {
+    cancelBatchExtraction();
+}
+
+// ============================================================================
 // 初始化
-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================

 export function initStateIntegration() {
    if (initialized) return;
    initialized = true;
-
-    // 监听变量团队的事件
-    $(document).on('xiaobaix:variables:stateAtomsGenerated', handleStateAtomsGenerated);
-
-    // 注册回滚钩子
    globalThis.LWB_StateRollbackHook = handleStateRollback;
-
    xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
 }

-// ═══════════════════════════════════════════════════════════════════════════
-// 事件处理
-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================
+// 统计
+// ============================================================================

-async function handleStateAtomsGenerated(e, data) {
-    const { atoms } = data || {};
-    if (!atoms?.length) return;
-
-    const { chatId } = getContext();
-    if (!chatId) return;
-
-    const validAtoms = atoms.filter(a => a?.chatId === chatId);
-    if (!validAtoms.length) {
-        xbLog.warn(MODULE_ID, `atoms.chatId 不匹配，期望 ${chatId}，跳过`);
-        return;
+export async function getAnchorStats() {
+    const { chat } = getContext();
+    if (!chat?.length) {
+        return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 };
    }

-    xbLog.info(MODULE_ID, `收到 ${validAtoms.length} 个 StateAtom`);
-
-    // 1. 存入 chat_metadata（持久化）
-    saveStateAtoms(validAtoms);
-
-    // 2. 向量化并存入 IndexedDB
-    const vectorCfg = getVectorConfig();
-    if (!vectorCfg?.enabled) {
-        xbLog.info(MODULE_ID, '向量未启用，跳过 L0 向量化');
-        return;
+    const aiFloors = [];
+    for (let i = 0; i < chat.length; i++) {
+        if (!chat[i]?.is_user) aiFloors.push(i);
    }

-    await vectorizeAtoms(chatId, validAtoms, vectorCfg);
+    let ok = 0;
+    let empty = 0;
+    let fail = 0;
+
+    for (const f of aiFloors) {
+        const s = getL0FloorStatus(f);
+        if (!s) continue;
+        if (s.status === 'ok') ok++;
+        else if (s.status === 'empty') empty++;
+        else if (s.status === 'fail') fail++;
+    }
+
+    const total = aiFloors.length;
+    const completed = ok + empty;
+    const pending = Math.max(0, total - completed);
+
+    return { extracted: completed, total, pending, empty, fail };
 }

-async function vectorizeAtoms(chatId, atoms, vectorCfg) {
+// ============================================================================
+// 增量提取
+// ============================================================================
+
+function buildL0InputText(userMessage, aiMessage) {
+    const parts = [];
+    const userName = userMessage?.name || '用户';
+    const aiName = aiMessage?.name || '角色';
+
+    if (userMessage?.mes?.trim()) {
+        parts.push(`【用户：${userName}】\n${filterText(userMessage.mes).trim()}`);
+    }
+    if (aiMessage?.mes?.trim()) {
+        parts.push(`【角色：${aiName}】\n${filterText(aiMessage.mes).trim()}`);
+    }
+
+    return parts.join('\n\n---\n\n').trim();
+}
+
+export async function incrementalExtractAtoms(chatId, chat, onProgress) {
+    if (!chatId || !chat?.length) return { built: 0 };
+
+    const vectorCfg = getVectorConfig();
+    if (!vectorCfg?.enabled) return { built: 0 };
+
+    const pendingPairs = [];
+
+    for (let i = 0; i < chat.length; i++) {
+        const msg = chat[i];
+        if (!msg || msg.is_user) continue;
+
+        const st = getL0FloorStatus(i);
+        if (st?.status === 'ok' || st?.status === 'empty') {
+            continue;
+        }
+
+        const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
+        const inputText = buildL0InputText(userMsg, msg);
+
+        if (!inputText) {
+            setL0FloorStatus(i, { status: 'empty', reason: 'filtered_empty', atoms: 0 });
+            continue;
+        }
+
+        pendingPairs.push({ userMsg, aiMsg: msg, aiFloor: i });
+    }
+
+    if (!pendingPairs.length) {
+        onProgress?.(0, 0, '已全部提取');
+        return { built: 0 };
+    }
+
+    xbLog.info(MODULE_ID, `增量 L0 提取：pending=${pendingPairs.length}`);
+
+    let completed = 0;
+    const total = pendingPairs.length;
+    let builtAtoms = 0;
+
+    for (const pair of pendingPairs) {
+        const floor = pair.aiFloor;
+        const prev = getL0FloorStatus(floor);
+
+        try {
+            const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
+
+            if (!atoms?.length) {
+                setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
+            } else {
+                atoms.forEach(a => a.chatId = chatId);
+                saveStateAtoms(atoms);
+                await vectorizeAtoms(chatId, atoms);
+
+                setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
+                builtAtoms += atoms.length;
+            }
+        } catch (e) {
+            setL0FloorStatus(floor, {
+                status: 'fail',
+                attempts: (prev?.attempts || 0) + 1,
+                reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
+            });
+        } finally {
+            completed++;
+            onProgress?.(`L0: ${completed}/${total}`, completed, total);
+        }
+    }
+
+    xbLog.info(MODULE_ID, `增量 L0 完成：atoms=${builtAtoms}, floors=${pendingPairs.length}`);
+    return { built: builtAtoms };
+}
+
+async function vectorizeAtoms(chatId, atoms) {
+    if (!atoms?.length) return;
+
+    const vectorCfg = getVectorConfig();
+    if (!vectorCfg?.enabled) return;
+
    const texts = atoms.map(a => a.semantic);
    const fingerprint = getEngineFingerprint(vectorCfg);

    try {
-        const vectors = await embed(texts, vectorCfg);
+        const vectors = await embed(texts, { timeout: 30000 });

        const items = atoms.map((a, i) => ({
            atomId: a.atomId,
@@ -83,34 +187,106 @@ async function vectorizeAtoms(chatId, atoms, vectorCfg) {
        }));

        await saveStateVectors(chatId, items, fingerprint);
-        xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 个`);
+        xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 条`);
    } catch (e) {
        xbLog.error(MODULE_ID, 'L0 向量化失败', e);
-        // 不阻塞，向量可后续通过"生成向量"重建
    }
 }

-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================
+// 清空
+// ============================================================================
+
+export async function clearAllAtomsAndVectors(chatId) {
+    clearStateAtoms();
+    clearL0Index();
+    if (chatId) {
+        await clearStateVectors(chatId);
+    }
+    xbLog.info(MODULE_ID, '已清空所有记忆锚点');
+}
+
+// ============================================================================
+// 实时增量（AI 消息后触发）- 保留原有逻辑
+// ============================================================================
+
+let extractionQueue = [];
+let isProcessing = false;
+
+export async function extractAndStoreAtomsForRound(aiFloor, aiMessage, userMessage) {
+    const { chatId } = getContext();
+    if (!chatId) return;
+
+    const vectorCfg = getVectorConfig();
+    if (!vectorCfg?.enabled) return;
+
+    extractionQueue.push({ aiFloor, aiMessage, userMessage, chatId });
+    processQueue();
+}
+
+async function processQueue() {
+    if (isProcessing || extractionQueue.length === 0) return;
+    isProcessing = true;
+
+    while (extractionQueue.length > 0) {
+        const { aiFloor, aiMessage, userMessage, chatId } = extractionQueue.shift();
+
+        try {
+            const atoms = await extractAtomsForRound(userMessage, aiMessage, aiFloor, { timeout: 12000 });
+
+            if (!atoms?.length) {
+                xbLog.info(MODULE_ID, `floor ${aiFloor}: 无有效 atoms`);
+                continue;
+            }
+
+            atoms.forEach(a => a.chatId = chatId);
+            saveStateAtoms(atoms);
+            await vectorizeAtoms(chatId, atoms);
+
+            xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`);
+        } catch (e) {
+            xbLog.error(MODULE_ID, `floor ${aiFloor} 处理失败`, e);
+        }
+    }
+
+    isProcessing = false;
+}
+
+// ============================================================================
 // 回滚钩子
-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================

 async function handleStateRollback(floor) {
    xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);

    const { chatId } = getContext();

-    // 1. 删除 chat_metadata 中的 atoms
    deleteStateAtomsFromFloor(floor);
+    deleteL0IndexFromFloor(floor);

-    // 2. 删除 IndexedDB 中的 vectors
    if (chatId) {
        await deleteStateVectorsFromFloor(chatId, floor);
    }
 }

-// ═══════════════════════════════════════════════════════════════════════════
-// 重建向量（供"生成向量"按钮调用）
-// ═══════════════════════════════════════════════════════════════════════════
+// ============================================================================
+// 兼容旧接口
+// ============================================================================
+
+export async function batchExtractAndStoreAtoms(chatId, chat, onProgress) {
+    if (!chatId || !chat?.length) return { built: 0 };
+
+    const vectorCfg = getVectorConfig();
+    if (!vectorCfg?.enabled) return { built: 0 };
+
+    xbLog.info(MODULE_ID, `开始批量 L0 提取: ${chat.length} 条消息`);
+
+    clearStateAtoms();
+    clearL0Index();
+    await clearStateVectors(chatId);
+
+    return await incrementalExtractAtoms(chatId, chat, onProgress);
+}

 export async function rebuildStateVectors(chatId, vectorCfg) {
    if (!chatId || !vectorCfg?.enabled) return { built: 0 };
@@ -118,36 +294,10 @@ export async function rebuildStateVectors(chatId, vectorCfg) {
    const atoms = getStateAtoms();
    if (!atoms.length) return { built: 0 };

-    xbLog.info(MODULE_ID, `开始重建 L0 向量: ${atoms.length} 个 atom`);
+    xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`);

-    // 清空旧向量
    await clearStateVectors(chatId);
+    await vectorizeAtoms(chatId, atoms);

-    // 重新向量化
-    const fingerprint = getEngineFingerprint(vectorCfg);
-    const batchSize = vectorCfg.engine === 'local' ? 5 : 25;
-    let built = 0;
-
-    for (let i = 0; i < atoms.length; i += batchSize) {
-        const batch = atoms.slice(i, i + batchSize);
-        const texts = batch.map(a => a.semantic);
-
-        try {
-            const vectors = await embed(texts, vectorCfg);
-
-            const items = batch.map((a, j) => ({
-                atomId: a.atomId,
-                floor: a.floor,
-                vector: vectors[j],
-            }));
-
-            await saveStateVectors(chatId, items, fingerprint);
-            built += items.length;
-        } catch (e) {
-            xbLog.error(MODULE_ID, `L0 向量化批次失败: ${i}-${i + batchSize}`, e);
-        }
-    }
-
-    xbLog.info(MODULE_ID, `L0 向量重建完成: ${built}/${atoms.length}`);
-    return { built };
+    return { built: atoms.length };
 }