Upload LittleWhiteBox extension

This commit is contained in:
RT15548
2026-02-16 17:11:25 +08:00
commit 14276b51b7
126 changed files with 87499 additions and 0 deletions

View File

@@ -0,0 +1,391 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Chunk Builder
// 标准 RAG chunking: ~200 tokens per chunk
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../../extensions.js';
import {
getMeta,
updateMeta,
saveChunks,
saveChunkVectors,
clearAllChunks,
deleteChunksFromFloor,
deleteChunksAtFloor,
makeChunkId,
hashText,
CHUNK_MAX_TOKENS,
} from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from '../utils/text-filter.js';
import { extractAndStoreAtomsForRound } from './state-integration.js';
import {
deleteStateAtomsFromFloor,
deleteStateVectorsFromFloor,
deleteL0IndexFromFloor,
} from '../storage/state-store.js';
const MODULE_ID = 'chunk-builder';
// ═══════════════════════════════════════════════════════════════════════════
// Token 估算
// ═══════════════════════════════════════════════════════════════════════════
function estimateTokens(text) {
if (!text) return 0;
const chinese = (text.match(/[\u4e00-\u9fff]/g) || []).length;
const other = text.length - chinese;
return Math.ceil(chinese + other / 4);
}
function splitSentences(text) {
if (!text) return [];
const parts = text.split(/(?<=[。!?\n])|(?<=[.!?]\s)/);
return parts.map(s => s.trim()).filter(s => s.length > 0);
}
// ═══════════════════════════════════════════════════════════════════════════
// Chunk 切分
// ═══════════════════════════════════════════════════════════════════════════
export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
const text = message.mes || '';
const speaker = message.name || (message.is_user ? '用户' : '角色');
const isUser = !!message.is_user;
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
// 3. 移除 <state> 标签硬编码L0 已单独存储)
const cleanText = filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '')
.replace(/<state>[\s\S]*?<\/state>/gi, '')
.trim();
if (!cleanText) return [];
const totalTokens = estimateTokens(cleanText);
if (totalTokens <= maxTokens) {
return [{
chunkId: makeChunkId(floor, 0),
floor,
chunkIdx: 0,
speaker,
isUser,
text: cleanText,
textHash: hashText(cleanText),
}];
}
const sentences = splitSentences(cleanText);
const chunks = [];
let currentSentences = [];
let currentTokens = 0;
for (const sent of sentences) {
const sentTokens = estimateTokens(sent);
if (sentTokens > maxTokens) {
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
const sliceSize = maxTokens * 2;
for (let i = 0; i < sent.length; i += sliceSize) {
const slice = sent.slice(i, i + sliceSize);
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: slice,
textHash: hashText(slice),
});
}
continue;
}
if (currentTokens + sentTokens > maxTokens && currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
currentSentences.push(sent);
currentTokens += sentTokens;
}
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
}
return chunks;
}
// ═══════════════════════════════════════════════════════════════════════════
// 构建状态
// ═══════════════════════════════════════════════════════════════════════════
export async function getChunkBuildStatus() {
const { chat, chatId } = getContext();
if (!chatId) {
return { totalFloors: 0, builtFloors: 0, pending: 0 };
}
const meta = await getMeta(chatId);
const totalFloors = chat?.length || 0;
const builtFloors = meta.lastChunkFloor + 1;
return {
totalFloors,
builtFloors,
lastChunkFloor: meta.lastChunkFloor,
pending: Math.max(0, totalFloors - builtFloors),
};
}
// ═══════════════════════════════════════════════════════════════════════════
// 全量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildAllChunks(options = {}) {
const { onProgress, shouldCancel, vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0, errors: 0 };
}
const fingerprint = getEngineFingerprint(vectorConfig);
await clearAllChunks(chatId);
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
const allChunks = [];
for (let floor = 0; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
allChunks.push(...chunks);
}
if (allChunks.length === 0) {
return { built: 0, errors: 0 };
}
xbLog.info(MODULE_ID, `开始构建 ${allChunks.length} 个 chunks${chat.length} 层楼)`);
await saveChunks(chatId, allChunks);
const texts = allChunks.map(c => c.text);
const batchSize = 20;
let completed = 0;
let errors = 0;
const allVectors = [];
for (let i = 0; i < texts.length; i += batchSize) {
if (shouldCancel?.()) break;
const batch = texts.slice(i, i + batchSize);
try {
const vectors = await embed(batch, vectorConfig);
allVectors.push(...vectors);
completed += batch.length;
onProgress?.(completed, texts.length);
} catch (e) {
xbLog.error(MODULE_ID, `批次 ${i}/${texts.length} 向量化失败`, e);
allVectors.push(...batch.map(() => null));
errors++;
}
}
if (shouldCancel?.()) {
return { built: completed, errors };
}
const vectorItems = allChunks
.map((chunk, idx) => allVectors[idx] ? { chunkId: chunk.chunkId, vector: allVectors[idx] } : null)
.filter(Boolean);
if (vectorItems.length > 0) {
await saveChunkVectors(chatId, vectorItems, fingerprint);
}
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
xbLog.info(MODULE_ID, `构建完成:${vectorItems.length} 个向量,${errors} 个错误`);
return { built: vectorItems.length, errors };
}
// ═══════════════════════════════════════════════════════════════════════════
// 增量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildIncrementalChunks(options = {}) {
const { vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0 };
}
const meta = await getMeta(chatId);
const fingerprint = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fingerprint) {
xbLog.warn(MODULE_ID, '引擎指纹不匹配,跳过增量构建');
return { built: 0 };
}
const startFloor = meta.lastChunkFloor + 1;
if (startFloor >= chat.length) {
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量构建 ${startFloor} - ${chat.length - 1}`);
const newChunks = [];
for (let floor = startFloor; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
newChunks.push(...chunks);
}
if (newChunks.length === 0) {
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: 0 };
}
await saveChunks(chatId, newChunks);
const texts = newChunks.map(c => c.text);
try {
const vectors = await embed(texts, vectorConfig);
const vectorItems = newChunks.map((chunk, idx) => ({
chunkId: chunk.chunkId,
vector: vectors[idx],
}));
await saveChunkVectors(chatId, vectorItems, fingerprint);
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: vectorItems.length };
} catch (e) {
xbLog.error(MODULE_ID, '增量向量化失败', e);
return { built: 0 };
}
}
// ═══════════════════════════════════════════════════════════════════════════
// L1 同步(消息变化时调用)
// ═══════════════════════════════════════════════════════════════════════════
/**
* 消息删除后同步:删除 floor >= newLength 的 chunk
*/
export async function syncOnMessageDeleted(chatId, newLength) {
if (!chatId || newLength < 0) return;
await deleteChunksFromFloor(chatId, newLength);
await updateMeta(chatId, { lastChunkFloor: newLength - 1 });
xbLog.info(MODULE_ID, `消息删除同步:删除 floor >= ${newLength}`);
}
/**
* swipe 后同步:删除最后楼层的 chunk等待后续重建
*/
export async function syncOnMessageSwiped(chatId, lastFloor) {
if (!chatId || lastFloor < 0) return;
await deleteChunksAtFloor(chatId, lastFloor);
await updateMeta(chatId, { lastChunkFloor: lastFloor - 1 });
xbLog.info(MODULE_ID, `swipe 同步:删除 floor ${lastFloor}`);
}
/**
* 新消息后同步:删除 + 重建最后楼层
*/
export async function syncOnMessageReceived(chatId, lastFloor, message, vectorConfig, onL0Complete) {
if (!chatId || lastFloor < 0 || !message) return { built: 0, chunks: [] };
if (!vectorConfig?.enabled) return { built: 0, chunks: [] };
// 删除该楼层旧的
await deleteChunksAtFloor(chatId, lastFloor);
// 重建
const chunks = chunkMessage(lastFloor, message);
if (chunks.length === 0) return { built: 0, chunks: [] };
await saveChunks(chatId, chunks);
// 向量化
const fingerprint = getEngineFingerprint(vectorConfig);
const texts = chunks.map(c => c.text);
let vectorized = false;
try {
const vectors = await embed(texts, vectorConfig);
const items = chunks.map((c, i) => ({ chunkId: c.chunkId, vector: vectors[i] }));
await saveChunkVectors(chatId, items, fingerprint);
await updateMeta(chatId, { lastChunkFloor: lastFloor });
vectorized = true;
xbLog.info(MODULE_ID, `消息同步:重建 floor ${lastFloor}${chunks.length} 个 chunk`);
} catch (e) {
xbLog.error(MODULE_ID, `消息同步失败floor ${lastFloor}`, e);
}
// L0 配对提取(仅 AI 消息触发)
if (!message.is_user) {
const { chat } = getContext();
const userFloor = lastFloor - 1;
const userMessage = (userFloor >= 0 && chat[userFloor]?.is_user) ? chat[userFloor] : null;
// L0 先删后建(与 L1 deleteChunksAtFloor 对称)
// regenerate / swipe 后新消息覆盖旧楼时,清理旧 atoms
deleteStateAtomsFromFloor(lastFloor);
deleteL0IndexFromFloor(lastFloor);
await deleteStateVectorsFromFloor(chatId, lastFloor);
try {
await extractAndStoreAtomsForRound(lastFloor, message, userMessage, onL0Complete);
} catch (e) {
xbLog.warn(MODULE_ID, `Atom 提取失败: floor ${lastFloor}`, e);
}
}
return { built: vectorized ? chunks.length : 0, chunks };
}

View File

@@ -0,0 +1,562 @@
// ============================================================================
// state-integration.js - L0 状态层集成
// Phase 1: 批量 LLM 提取(只存文本)
// Phase 2: 统一向量化(提取完成后)
// ============================================================================
import { getContext } from '../../../../../../../extensions.js';
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
import { xbLog } from '../../../../core/debug-core.js';
import {
saveStateAtoms,
saveStateVectors,
deleteStateAtomsFromFloor,
deleteStateVectorsFromFloor,
getStateAtoms,
clearStateAtoms,
clearStateVectors,
getL0FloorStatus,
setL0FloorStatus,
clearL0Index,
deleteL0IndexFromFloor,
} from '../storage/state-store.js';
import { embed } from '../llm/siliconflow.js';
import { extractAtomsForRound, cancelBatchExtraction } from '../llm/atom-extraction.js';
import { getVectorConfig } from '../../data/config.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'state-integration';
// ★ 并发配置
const CONCURRENCY = 50;
const STAGGER_DELAY = 15;
const DEBUG_CONCURRENCY = true;
const R_AGG_MAX_CHARS = 256;
let initialized = false;
let extractionCancelled = false;
export function cancelL0Extraction() {
extractionCancelled = true;
cancelBatchExtraction();
}
// ============================================================================
// 初始化
// ============================================================================
export function initStateIntegration() {
if (initialized) return;
initialized = true;
globalThis.LWB_StateRollbackHook = handleStateRollback;
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
}
// ============================================================================
// 统计
// ============================================================================
export async function getAnchorStats() {
const { chat } = getContext();
if (!chat?.length) {
return { extracted: 0, total: 0, pending: 0, empty: 0, fail: 0 };
}
// 统计 AI 楼层
const aiFloors = [];
for (let i = 0; i < chat.length; i++) {
if (!chat[i]?.is_user) aiFloors.push(i);
}
let ok = 0;
let empty = 0;
let fail = 0;
for (const f of aiFloors) {
const s = getL0FloorStatus(f);
if (!s) continue;
if (s.status === 'ok') ok++;
else if (s.status === 'empty') empty++;
else if (s.status === 'fail') fail++;
}
const total = aiFloors.length;
const processed = ok + empty + fail;
const pending = Math.max(0, total - processed);
return {
extracted: ok + empty,
total,
pending,
empty,
fail
};
}
// ============================================================================
// 增量提取 - Phase 1 提取文本Phase 2 统一向量化
// ============================================================================
function buildL0InputText(userMessage, aiMessage) {
const parts = [];
const userName = userMessage?.name || '用户';
const aiName = aiMessage?.name || '角色';
if (userMessage?.mes?.trim()) {
parts.push(`【用户:${userName}\n${filterText(userMessage.mes).trim()}`);
}
if (aiMessage?.mes?.trim()) {
parts.push(`【角色:${aiName}\n${filterText(aiMessage.mes).trim()}`);
}
return parts.join('\n\n---\n\n').trim();
}
function buildRAggregateText(atom) {
const uniq = new Set();
for (const edge of (atom?.edges || [])) {
const r = String(edge?.r || '').trim();
if (!r) continue;
uniq.add(r);
}
const joined = [...uniq].join(' ; ');
if (!joined) return String(atom?.semantic || '').trim();
return joined.length > R_AGG_MAX_CHARS ? joined.slice(0, R_AGG_MAX_CHARS) : joined;
}
export async function incrementalExtractAtoms(chatId, chat, onProgress, options = {}) {
const { maxFloors = Infinity } = options;
if (!chatId || !chat?.length) return { built: 0 };
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return { built: 0 };
// ★ 重置取消标志
extractionCancelled = false;
const pendingPairs = [];
for (let i = 0; i < chat.length; i++) {
const msg = chat[i];
if (!msg || msg.is_user) continue;
const st = getL0FloorStatus(i);
// ★ 只跳过 ok 和 emptyfail 的可以重试
if (st?.status === 'ok' || st?.status === 'empty') {
continue;
}
const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
const inputText = buildL0InputText(userMsg, msg);
if (!inputText) {
setL0FloorStatus(i, { status: 'empty', reason: 'filtered_empty', atoms: 0 });
continue;
}
pendingPairs.push({ userMsg, aiMsg: msg, aiFloor: i });
}
// 限制单次提取楼层数(自动触发时使用)
if (pendingPairs.length > maxFloors) {
pendingPairs.length = maxFloors;
}
if (!pendingPairs.length) {
onProgress?.('已全部提取', 0, 0);
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量 L0 提取pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`);
let completed = 0;
let failed = 0;
const total = pendingPairs.length;
let builtAtoms = 0;
let active = 0;
let peakActive = 0;
const tStart = performance.now();
// ★ Phase 1: 收集所有新提取的 atoms不向量化
const allNewAtoms = [];
// ★ 限流检测:连续失败 N 次后暂停并降速
let consecutiveFailures = 0;
let rateLimited = false;
const RATE_LIMIT_THRESHOLD = 3; // 连续失败多少次触发限流保护
const RATE_LIMIT_WAIT_MS = 60000; // 限流后等待时间60 秒)
const RETRY_INTERVAL_MS = 1000; // 降速模式下每次请求间隔1 秒)
const RETRY_CONCURRENCY = 1; // ★ 降速模式下的并发数默认1建议不要超过5
// ★ 通用处理单个 pair 的逻辑(复用于正常模式和降速模式)
const processPair = async (pair, idx, workerId) => {
const floor = pair.aiFloor;
const prev = getL0FloorStatus(floor);
active++;
if (active > peakActive) peakActive = active;
if (DEBUG_CONCURRENCY && (idx % 10 === 0)) {
xbLog.info(MODULE_ID, `L0 pool start idx=${idx} active=${active} peak=${peakActive} worker=${workerId}`);
}
try {
const atoms = await extractAtomsForRound(pair.userMsg, pair.aiMsg, floor, { timeout: 20000 });
if (extractionCancelled) return;
if (atoms == null) {
throw new Error('llm_failed');
}
// ★ 成功:重置连续失败计数
consecutiveFailures = 0;
if (!atoms.length) {
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
} else {
atoms.forEach(a => a.chatId = chatId);
saveStateAtoms(atoms);
allNewAtoms.push(...atoms);
setL0FloorStatus(floor, { status: 'ok', atoms: atoms.length });
builtAtoms += atoms.length;
}
} catch (e) {
if (extractionCancelled) return;
setL0FloorStatus(floor, {
status: 'fail',
attempts: (prev?.attempts || 0) + 1,
reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
});
failed++;
// ★ 限流检测:连续失败累加
consecutiveFailures++;
if (consecutiveFailures >= RATE_LIMIT_THRESHOLD && !rateLimited) {
rateLimited = true;
xbLog.warn(MODULE_ID, `连续失败 ${consecutiveFailures} 次,疑似触发 API 限流,将暂停所有并发`);
}
} finally {
active--;
if (!extractionCancelled) {
completed++;
onProgress?.(`提取: ${completed}/${total}`, completed, total);
}
if (DEBUG_CONCURRENCY && (completed % 25 === 0 || completed === total)) {
const elapsed = Math.max(1, Math.round(performance.now() - tStart));
xbLog.info(MODULE_ID, `L0 pool progress=${completed}/${total} active=${active} peak=${peakActive} elapsedMs=${elapsed}`);
}
}
};
// ★ 并发池处理(保持固定并发度)
const poolSize = Math.min(CONCURRENCY, pendingPairs.length);
let nextIndex = 0;
let started = 0;
const runWorker = async (workerId) => {
while (true) {
if (extractionCancelled || rateLimited) return;
const idx = nextIndex++;
if (idx >= pendingPairs.length) return;
const pair = pendingPairs[idx];
const stagger = started++;
if (STAGGER_DELAY > 0) {
await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY));
}
if (extractionCancelled || rateLimited) return;
await processPair(pair, idx, workerId);
}
};
await Promise.all(Array.from({ length: poolSize }, (_, i) => runWorker(i)));
if (DEBUG_CONCURRENCY) {
const elapsed = Math.max(1, Math.round(performance.now() - tStart));
xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`);
}
// ═════════════════════════════════════════════════════════════════════
// ★ 限流恢复:重置进度,从头开始以限速模式慢慢跑
// ═════════════════════════════════════════════════════════════════════
if (rateLimited && !extractionCancelled) {
const waitSec = RATE_LIMIT_WAIT_MS / 1000;
xbLog.info(MODULE_ID, `限流保护:将重置进度并从头开始降速重来(并发=${RETRY_CONCURRENCY}, 间隔=${RETRY_INTERVAL_MS}ms`);
onProgress?.(`疑似限流,${waitSec}s 后降速重头开始...`, completed, total);
await new Promise(r => setTimeout(r, RATE_LIMIT_WAIT_MS));
if (!extractionCancelled) {
// ★ 核心逻辑:重置计数器,让 UI 从 0 开始跑,给用户“重头开始”的反馈
rateLimited = false;
consecutiveFailures = 0;
completed = 0;
failed = 0;
let retryNextIdx = 0;
xbLog.info(MODULE_ID, `限流恢复:开始降速模式扫描 ${pendingPairs.length} 个楼层`);
const retryWorkers = Math.min(RETRY_CONCURRENCY, pendingPairs.length);
const runRetryWorker = async (wid) => {
while (true) {
if (extractionCancelled) return;
const idx = retryNextIdx++;
if (idx >= pendingPairs.length) return;
const pair = pendingPairs[idx];
const floor = pair.aiFloor;
// ★ 检查该楼层状态
const st = getL0FloorStatus(floor);
if (st?.status === 'ok' || st?.status === 'empty') {
// 刚才已经成功了,直接跳过(仅增加进度计数)
completed++;
onProgress?.(`提取: ${completed}/${total} (跳过已完成)`, completed, total);
continue;
}
// ★ 没做过的,用 slow 模式处理
await processPair(pair, idx, `retry-${wid}`);
// 每个请求后休息,避免再次触发限流
if (idx < pendingPairs.length - 1 && RETRY_INTERVAL_MS > 0) {
await new Promise(r => setTimeout(r, RETRY_INTERVAL_MS));
}
}
};
await Promise.all(Array.from({ length: retryWorkers }, (_, i) => runRetryWorker(i)));
xbLog.info(MODULE_ID, `降速重头开始阶段结束`);
}
}
try {
saveMetadataDebounced?.();
} catch { }
// ★ Phase 2: 统一向量化所有新提取的 atoms
if (allNewAtoms.length > 0 && !extractionCancelled) {
onProgress?.(`向量化 L0: 0/${allNewAtoms.length}`, 0, allNewAtoms.length);
await vectorizeAtoms(chatId, allNewAtoms, (current, total) => {
onProgress?.(`向量化 L0: ${current}/${total}`, current, total);
});
}
xbLog.info(MODULE_ID, `L0 ${extractionCancelled ? '已取消' : '完成'}atoms=${builtAtoms}, completed=${completed}/${total}, failed=${failed}`);
return { built: builtAtoms };
}
// ============================================================================
// 向量化(支持进度回调)
// ============================================================================
async function vectorizeAtoms(chatId, atoms, onProgress) {
if (!atoms?.length) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
const batchSize = 20;
try {
const allVectors = [];
for (let i = 0; i < semanticTexts.length; i += batchSize) {
if (extractionCancelled) break;
const semBatch = semanticTexts.slice(i, i + batchSize);
const rBatch = rTexts.slice(i, i + batchSize);
const payload = semBatch.concat(rBatch);
const vectors = await embed(payload, { timeout: 30000 });
const split = semBatch.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
for (let j = 0; j < split; j++) {
allVectors.push({
vector: semVectors[j],
rVector: rVectors[j] || semVectors[j],
});
}
onProgress?.(allVectors.length, semanticTexts.length);
}
if (extractionCancelled) return;
const items = atoms.slice(0, allVectors.length).map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: allVectors[i].vector,
rVector: allVectors[i].rVector,
}));
await saveStateVectors(chatId, items, fingerprint);
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length}`);
} catch (e) {
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
}
}
// ============================================================================
// 清空
// ============================================================================
export async function clearAllAtomsAndVectors(chatId) {
clearStateAtoms();
clearL0Index();
if (chatId) {
await clearStateVectors(chatId);
}
// ★ 立即保存
try {
saveMetadataDebounced?.();
} catch { }
xbLog.info(MODULE_ID, '已清空所有记忆锚点');
}
// ============================================================================
// 实时增量AI 消息后触发)- 保持不变
// ============================================================================
let extractionQueue = [];
let isProcessing = false;
export async function extractAndStoreAtomsForRound(aiFloor, aiMessage, userMessage, onComplete) {
const { chatId } = getContext();
if (!chatId) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
extractionQueue.push({ aiFloor, aiMessage, userMessage, chatId, onComplete });
processQueue();
}
async function processQueue() {
if (isProcessing || extractionQueue.length === 0) return;
isProcessing = true;
while (extractionQueue.length > 0) {
const { aiFloor, aiMessage, userMessage, chatId, onComplete } = extractionQueue.shift();
try {
const atoms = await extractAtomsForRound(userMessage, aiMessage, aiFloor, { timeout: 12000 });
if (!atoms?.length) {
xbLog.info(MODULE_ID, `floor ${aiFloor}: 无有效 atoms`);
onComplete?.({ floor: aiFloor, atomCount: 0 });
continue;
}
atoms.forEach(a => a.chatId = chatId);
saveStateAtoms(atoms);
// 单楼实时处理:立即向量化
await vectorizeAtomsSimple(chatId, atoms);
xbLog.info(MODULE_ID, `floor ${aiFloor}: ${atoms.length} atoms 已存储`);
onComplete?.({ floor: aiFloor, atomCount: atoms.length });
} catch (e) {
xbLog.error(MODULE_ID, `floor ${aiFloor} 处理失败`, e);
onComplete?.({ floor: aiFloor, atomCount: 0, error: e });
}
}
isProcessing = false;
}
// 简单向量化(无进度回调,用于单楼实时处理)
async function vectorizeAtomsSimple(chatId, atoms) {
if (!atoms?.length) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
const semanticTexts = atoms.map(a => a.semantic);
const rTexts = atoms.map(a => buildRAggregateText(a));
const fingerprint = getEngineFingerprint(vectorCfg);
try {
const vectors = await embed(semanticTexts.concat(rTexts), { timeout: 30000 });
const split = semanticTexts.length;
if (!Array.isArray(vectors) || vectors.length < split * 2) {
throw new Error(`embed length mismatch: expect>=${split * 2}, got=${vectors?.length || 0}`);
}
const semVectors = vectors.slice(0, split);
const rVectors = vectors.slice(split, split + split);
const items = atoms.map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: semVectors[i],
rVector: rVectors[i] || semVectors[i],
}));
await saveStateVectors(chatId, items, fingerprint);
} catch (e) {
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
}
}
// ============================================================================
// 回滚钩子
// ============================================================================
async function handleStateRollback(floor) {
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
const { chatId } = getContext();
deleteStateAtomsFromFloor(floor);
deleteL0IndexFromFloor(floor);
if (chatId) {
await deleteStateVectorsFromFloor(chatId, floor);
}
}
// ============================================================================
// 兼容旧接口
// ============================================================================
export async function batchExtractAndStoreAtoms(chatId, chat, onProgress) {
if (!chatId || !chat?.length) return { built: 0 };
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return { built: 0 };
xbLog.info(MODULE_ID, `开始批量 L0 提取: ${chat.length} 条消息`);
clearStateAtoms();
clearL0Index();
await clearStateVectors(chatId);
return await incrementalExtractAtoms(chatId, chat, onProgress);
}
export async function rebuildStateVectors(chatId, vectorCfg) {
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
const atoms = getStateAtoms();
if (!atoms.length) return { built: 0 };
xbLog.info(MODULE_ID, `重建 L0 向量: ${atoms.length} 条 atom`);
await clearStateVectors(chatId);
await vectorizeAtomsSimple(chatId, atoms);
return { built: atoms.length };
}