Add L0 index and anchor UI updates
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - Chunk Builder
|
||||
// 标准 RAG chunking: ~200 tokens per chunk
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
import { extractAndStoreAtomsForRound } from './state-integration.js';
|
||||
|
||||
const MODULE_ID = 'chunk-builder';
|
||||
|
||||
@@ -201,8 +202,7 @@ export async function buildAllChunks(options = {}) {
|
||||
await saveChunks(chatId, allChunks);
|
||||
|
||||
const texts = allChunks.map(c => c.text);
|
||||
const isLocal = vectorConfig.engine === 'local';
|
||||
const batchSize = isLocal ? 5 : 20;
|
||||
const batchSize = 20;
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
@@ -302,6 +302,7 @@ export async function buildIncrementalChunks(options = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L1 同步(消息变化时调用)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -337,13 +338,6 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
|
||||
if (!chatId || lastFloor < 0 || !message) return;
|
||||
if (!vectorConfig?.enabled) return;
|
||||
|
||||
// 本地模型未加载时跳过(避免意外触发下载或报错)
|
||||
if (vectorConfig.engine === "local") {
|
||||
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
|
||||
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
|
||||
if (!isLocalModelLoaded(modelId)) return;
|
||||
}
|
||||
|
||||
// 删除该楼层旧的
|
||||
await deleteChunksAtFloor(chatId, lastFloor);
|
||||
|
||||
@@ -367,4 +361,18 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `消息同步失败:floor ${lastFloor}`, e);
|
||||
}
|
||||
// L0 配对提取(仅 AI 消息触发)
|
||||
if (!message.is_user) {
|
||||
const { chat } = getContext();
|
||||
const userFloor = lastFloor - 1;
|
||||
const userMessage = (userFloor >= 0 && chat[userFloor]?.is_user) ? chat[userFloor] : null;
|
||||
|
||||
try {
|
||||
await extractAndStoreAtomsForRound(lastFloor, message, userMessage);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, `Atom 提取失败: floor ${lastFloor}`, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Integration (L0)
|
||||
// 事件监听 + 回滚钩子注册
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
// state-integration.js - L0 记忆锚点管理
|
||||
// 支持增量提取、清空、取消
|
||||
// ============================================================================
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
@@ -11,70 +11,174 @@ import {
|
||||
deleteStateAtomsFromFloor,
|
||||
deleteStateVectorsFromFloor,
|
||||
getStateAtoms,
|
||||
clearStateAtoms,
|
||||
clearStateVectors,
|
||||
getL0FloorStatus,
|
||||
setL0FloorStatus,
|
||||
clearL0Index,
|
||||
deleteL0IndexFromFloor,
|
||||
} from '../storage/state-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { embed } from '../llm/siliconflow.js';
|
||||
import { extractAtomsForRound, cancelBatchExtraction } from '../llm/atom-extraction.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
|
||||
const MODULE_ID = 'state-integration';
|
||||
|
||||
let initialized = false;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
export function cancelL0Extraction() {
|
||||
cancelBatchExtraction();
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 初始化
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
|
||||
export function initStateIntegration() {
|
||||
if (initialized) return;
|
||||
initialized = true;
|
||||
|
||||
// 监听变量团队的事件
|
||||
$(document).on('xiaobaix:variables:stateAtomsGenerated', handleStateAtomsGenerated);
|
||||
|
||||
// 注册回滚钩子
|
||||
globalThis.LWB_StateRollbackHook = handleStateRollback;
|
||||
|
||||
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 事件处理
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
// 统计
|
||||
// ============================================================================
|
||||
|
||||
async function handleStateAtomsGenerated(e, data) {
|
||||
const { atoms } = data || {};
|
||||
if (!atoms?.length) return;
|
||||
|
||||
const { chatId } = getContext();
|
||||
if (!chatId) return;
|
||||
|
||||
const validAtoms = atoms.filter(a => a?.chatId === chatId);
|
||||
if (!validAtoms.length) {
|
||||
xbLog.warn(MODULE_ID, `atoms.chatId 不匹配,期望 ${chatId},跳过`);
|
||||
return;
|
||||
export async function getAnchorStats() {
|
||||
const { chat } = getContext();
|
||||
if (!chat?.length) {
|
||||
return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 };
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `收到 ${validAtoms.length} 个 StateAtom`);
|
||||
|
||||
// 1. 存入 chat_metadata(持久化)
|
||||
saveStateAtoms(validAtoms);
|
||||
|
||||
// 2. 向量化并存入 IndexedDB
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) {
|
||||
xbLog.info(MODULE_ID, '向量未启用,跳过 L0 向量化');
|
||||
return;
|
||||
const aiFloors = [];
|
||||
for (let i = 0; i < chat.length; i++) {
|
||||
if (!chat[i]?.is_user) aiFloors.push(i);
|
||||
}
|
||||
|
||||
await vectorizeAtoms(chatId, validAtoms, vectorCfg);
|
||||
let ok = 0;
|
||||
let empty = 0;
|
||||
let fail = 0;
|
||||
|
||||
for (const f of aiFloors) {
|
||||
const s = getL0FloorStatus(f);
|
||||
if (!s) continue;
|
||||
if (s.status === 'ok') ok++;
|
||||
else if (s.status === 'empty') empty++;
|
||||
else if (s.status === 'fail') fail++;
|
||||
}
|
||||
|
||||
const total = aiFloors.length;
|
||||
const completed = ok + empty;
|
||||
const pending = Math.max(0, total - completed);
|
||||
|
||||
return { extracted: completed, total, pending, empty, fail };
|
||||
}
|
||||
|
||||
async function vectorizeAtoms(chatId, atoms, vectorCfg) {
|
||||
// ============================================================================
|
||||
// 增量提取
|
||||
// ============================================================================
|
||||
|
||||
function buildL0InputText(userMessage, aiMessage) {
|
||||
const parts = [];
|
||||
const userName = userMessage?.name || '用户';
|
||||
const aiName = aiMessage?.name || '角色';
|
||||
|
||||
if (userMessage?.mes?.trim()) {
|
||||
parts.push(`【用户:${userName}】\n${filterText(userMessage.mes).trim()}`);
|
||||
}
|
||||
if (aiMessage?.mes?.trim()) {
|
||||
parts.push(`【角色:${aiName}】\n${filterText(aiMessage.mes).trim()}`);
|
||||
}
|
||||
|
||||
return parts.join('\n\n---\n\n').trim();
|
||||
}
|
||||
|
||||
export async function incrementalExtractAtoms(chatId, chat, onProgress) {
|
||||
if (!chatId || !chat?.length) return { built: 0 };
|
||||
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return { built: 0 };
|
||||
|
||||
const pendingPairs = [];
|
||||
|
||||
for (let i = 0; i < chat.length; i++) {
|
||||
const msg = chat[i];
|
||||
if (!msg || msg.is_user) continue;
|
||||
|
||||
const st = getL0FloorStatus(i);
|
||||
if (st?.status === 'ok' || st?.status === 'empty') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
|
||||
const inputText = buildL0InputText(userMsg, msg);
|
||||
|
||||
if (!inputText) {
|
||||
setL0FloorStatus(i, { status: 'empty', reason: 'filtered_empty', atoms: 0 });
|
||||
continue;
|
||||
}
|
||||
|
||||
pendingPairs.push({ userMsg, aiMsg: msg, aiFloor: i });
|
||||
}
|
||||
|
||||
if (!pendingPairs.length) {
|
||||
onProgress?.(0, 0, '已全部提取');
|
||||
return { built: 0 };
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `增量 L0 提取:pending=${pendingPairs.length}`);
|
||||
|
||||
let completed = 0;
|
||||
const total = pendingPairs.length;
|
||||
let builtAtoms = 0;
|
||||
|
||||
for (const pair of pendingPairs) {
|
||||
const floor = pair.aiFloor;
|
||||
const prev = getL0FloorStatus(floor);
|
||||
|
||||
try {
|
||||
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
|
||||
|
||||
if (!atoms?.length) {
|
||||
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
|
||||
} else {
|
||||
atoms.forEach(a => a.chatId = chatId);
|
||||
saveStateAtoms(atoms);
|
||||
await vectorizeAtoms(chatId, atoms);
|
||||
|
||||
setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
|
||||
builtAtoms += atoms.length;
|
||||
}
|
||||
} catch (e) {
|
||||
setL0FloorStatus(floor, {
|
||||
status: 'fail',
|
||||
attempts: (prev?.attempts || 0) + 1,
|
||||
reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
|
||||
});
|
||||
} finally {
|
||||
completed++;
|
||||
onProgress?.(`L0: ${completed}/${total}`, completed, total);
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `增量 L0 完成:atoms=${builtAtoms}, floors=${pendingPairs.length}`);
|
||||
return { built: builtAtoms };
|
||||
}
|
||||
|
||||
async function vectorizeAtoms(chatId, atoms) {
|
||||
if (!atoms?.length) return;
|
||||
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return;
|
||||
|
||||
const texts = atoms.map(a => a.semantic);
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
const vectors = await embed(texts, { timeout: 30000 });
|
||||
|
||||
const items = atoms.map((a, i) => ({
|
||||
atomId: a.atomId,
|
||||
@@ -83,34 +187,106 @@ async function vectorizeAtoms(chatId, atoms, vectorCfg) {
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 个`);
|
||||
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 条`);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
|
||||
// 不阻塞,向量可后续通过"生成向量"重建
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
// 清空
|
||||
// ============================================================================
|
||||
|
||||
export async function clearAllAtomsAndVectors(chatId) {
|
||||
clearStateAtoms();
|
||||
clearL0Index();
|
||||
if (chatId) {
|
||||
await clearStateVectors(chatId);
|
||||
}
|
||||
xbLog.info(MODULE_ID, '已清空所有记忆锚点');
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 实时增量(AI 消息后触发)- 保留原有逻辑
|
||||
// ============================================================================
|
||||
|
||||
let extractionQueue = [];
|
||||
let isProcessing = false;
|
||||
|
||||
export async function extractAndStoreAtomsForRound(aiFloor, aiMessage, userMessage) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId) return;
|
||||
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return;
|
||||
|
||||
extractionQueue.push({ aiFloor, aiMessage, userMessage, chatId });
|
||||
processQueue();
|
||||
}
|
||||
|
||||
async function processQueue() {
|
||||
if (isProcessing || extractionQueue.length === 0) return;
|
||||
isProcessing = true;
|
||||
|
||||
while (extractionQueue.length > 0) {
|
||||
const { aiFloor, aiMessage, userMessage, chatId } = extractionQueue.shift();
|
||||
|
||||
try {
|
||||
const atoms = await extractAtomsForRound(userMessage, aiMessage, aiFloor, { timeout: 12000 });
|
||||
|
||||
if (!atoms?.length) {
|
||||
xbLog.info(MODULE_ID, `floor ${aiFloor}: 无有效 atoms`);
|
||||
continue;
|
||||
}
|
||||
|
||||
atoms.forEach(a => a.chatId = chatId);
|
||||
saveStateAtoms(atoms);
|
||||
await vectorizeAtoms(chatId, atoms);
|
||||
|
||||
xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `floor ${aiFloor} 处理失败`, e);
|
||||
}
|
||||
}
|
||||
|
||||
isProcessing = false;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// 回滚钩子
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
|
||||
async function handleStateRollback(floor) {
|
||||
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
|
||||
|
||||
const { chatId } = getContext();
|
||||
|
||||
// 1. 删除 chat_metadata 中的 atoms
|
||||
deleteStateAtomsFromFloor(floor);
|
||||
deleteL0IndexFromFloor(floor);
|
||||
|
||||
// 2. 删除 IndexedDB 中的 vectors
|
||||
if (chatId) {
|
||||
await deleteStateVectorsFromFloor(chatId, floor);
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 重建向量(供"生成向量"按钮调用)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ============================================================================
|
||||
// 兼容旧接口
|
||||
// ============================================================================
|
||||
|
||||
export async function batchExtractAndStoreAtoms(chatId, chat, onProgress) {
|
||||
if (!chatId || !chat?.length) return { built: 0 };
|
||||
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) return { built: 0 };
|
||||
|
||||
xbLog.info(MODULE_ID, `开始批量 L0 提取: ${chat.length} 条消息`);
|
||||
|
||||
clearStateAtoms();
|
||||
clearL0Index();
|
||||
await clearStateVectors(chatId);
|
||||
|
||||
return await incrementalExtractAtoms(chatId, chat, onProgress);
|
||||
}
|
||||
|
||||
export async function rebuildStateVectors(chatId, vectorCfg) {
|
||||
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
|
||||
@@ -118,36 +294,10 @@ export async function rebuildStateVectors(chatId, vectorCfg) {
|
||||
const atoms = getStateAtoms();
|
||||
if (!atoms.length) return { built: 0 };
|
||||
|
||||
xbLog.info(MODULE_ID, `开始重建 L0 向量: ${atoms.length} 个 atom`);
|
||||
xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`);
|
||||
|
||||
// 清空旧向量
|
||||
await clearStateVectors(chatId);
|
||||
await vectorizeAtoms(chatId, atoms);
|
||||
|
||||
// 重新向量化
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
const batchSize = vectorCfg.engine === 'local' ? 5 : 25;
|
||||
let built = 0;
|
||||
|
||||
for (let i = 0; i < atoms.length; i += batchSize) {
|
||||
const batch = atoms.slice(i, i + batchSize);
|
||||
const texts = batch.map(a => a.semantic);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
|
||||
const items = batch.map((a, j) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[j],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
built += items.length;
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `L0 向量化批次失败: ${i}-${i + batchSize}`, e);
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `L0 向量重建完成: ${built}/${atoms.length}`);
|
||||
return { built };
|
||||
return { built: atoms.length };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user