feat: variables 2.0 state + L0 summary integration

This commit is contained in:
2026-01-31 23:06:03 +08:00
parent 201c74dc71
commit 4b0541610b
22 changed files with 1949 additions and 2314 deletions

View File

@@ -3,7 +3,7 @@
import Dexie from '../../../libs/dexie.mjs';
const DB_NAME = 'LittleWhiteBox_Memory';
const DB_VERSION = 2;
const DB_VERSION = 3; // 升级版本
// Chunk parameters
export const CHUNK_MAX_TOKENS = 200;
@@ -15,6 +15,7 @@ db.version(DB_VERSION).stores({
chunks: '[chatId+chunkId], chatId, [chatId+floor]',
chunkVectors: '[chatId+chunkId], chatId',
eventVectors: '[chatId+eventId], chatId',
stateVectors: '[chatId+atomId], chatId, [chatId+floor]', // L0 向量表
});
export { db };
@@ -22,3 +23,4 @@ export const metaTable = db.meta;
export const chunksTable = db.chunks;
export const chunkVectorsTable = db.chunkVectors;
export const eventVectorsTable = db.eventVectors;
export const stateVectorsTable = db.stateVectors;

View File

@@ -128,9 +128,16 @@ function formatArcLine(a) {
return `- ${a.name}${a.trajectory}`;
}
// 完整 chunk 输出(不截断
// 完整 chunk 输出(支持 L0 虚拟 chunk
function formatChunkFullLine(c) {
const { name1, name2 } = getContext();
// L0 虚拟 chunk
if (c.isL0) {
return ` #${c.floor + 1} [📌] ${String(c.text || "").trim()}`;
}
// L1 真实 chunk
const speaker = c.isUser ? (name1 || "用户") : (name2 || "角色");
return ` #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
}

View File

@@ -75,6 +75,8 @@ import {
syncOnMessageSwiped,
syncOnMessageReceived,
} from "./vector/chunk-builder.js";
import { initStateIntegration, rebuildStateVectors } from "./vector/state-integration.js";
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/state-store.js";
// vector io
import { exportVectors, importVectors } from "./vector/vector-io.js";
@@ -210,6 +212,8 @@ async function sendVectorStatsToFrame() {
const stats = await getStorageStats(chatId);
const chunkStatus = await getChunkBuildStatus();
const totalMessages = chat?.length || 0;
const stateAtomsCount = getStateAtomsCount();
const stateVectorsCount = await getStateVectorsCount(chatId);
const cfg = getVectorConfig();
let mismatch = false;
@@ -228,6 +232,8 @@ async function sendVectorStatsToFrame() {
builtFloors: chunkStatus.builtFloors,
totalFloors: chunkStatus.totalFloors,
totalMessages,
stateAtoms: stateAtomsCount,
stateVectors: stateVectorsCount,
},
mismatch,
});
@@ -350,6 +356,14 @@ async function handleGenerateVectors(vectorCfg) {
const batchSize = isLocal ? 5 : 25;
const concurrency = isLocal ? 1 : 2;
// L0 向量重建
try {
await rebuildStateVectors(chatId, vectorCfg);
} catch (e) {
xbLog.error(MODULE_ID, "L0 向量重建失败", e);
// 不阻塞,继续 L1/L2
}
await clearAllChunks(chatId);
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
@@ -649,6 +663,7 @@ async function handleClearVectors() {
await clearEventVectors(chatId);
await clearAllChunks(chatId);
await clearStateVectors(chatId);
await updateMeta(chatId, { lastChunkFloor: -1 });
await sendVectorStatsToFrame();
await executeSlashCommand('/echo severity=info 向量数据已清除。如需恢复召回功能,请重新点击"生成向量"。');
@@ -1400,7 +1415,7 @@ async function handleGenerationStarted(type, _params, isDryRun) {
// 2) depth倒序插入从末尾往前数
// 最小为 1避免插入到最底部导致 AI 看到的最后是总结
const depth = Math.max(1, chatLen - boundary - 1);
const depth = Math.max(2, chatLen - boundary - 1);
if (depth < 0) return;
// 3) 构建注入文本(保持原逻辑)
@@ -1504,4 +1519,5 @@ $(document).on("xiaobaix:storySummary:toggle", (_e, enabled) => {
jQuery(() => {
if (!getSettings().storySummary?.enabled) return;
registerEvents();
initStateIntegration();
});

View File

@@ -50,8 +50,10 @@ export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
// 3. 移除 <state> 标签硬编码L0 已单独存储)
const cleanText = filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '')
.replace(/<state>[\s\S]*?<\/state>/gi, '')
.trim();
if (!cleanText) return [];

View File

@@ -471,8 +471,6 @@ async function embedOnline(texts, provider, config, options = {}) {
const providerConfig = ONLINE_PROVIDERS[provider];
const baseUrl = (providerConfig?.baseUrl || url || '').replace(/\/+$/, '');
const reqId = Math.random().toString(36).slice(2, 6);
// 永远重试:指数退避 + 上限 + 抖动
const BASE_WAIT_MS = 1200;
const MAX_WAIT_MS = 15000;
@@ -491,9 +489,6 @@ async function embedOnline(texts, provider, config, options = {}) {
let attempt = 0;
while (true) {
attempt++;
const startTime = Date.now();
console.log(`[embed ${reqId}] send ${texts.length} items (attempt ${attempt})`);
try {
let response;
@@ -526,8 +521,6 @@ async function embedOnline(texts, provider, config, options = {}) {
});
}
console.log(`[embed ${reqId}] status=${response.status} time=${Date.now() - startTime}ms`);
// 需要“永远重试”的典型状态:
// - 429限流
// - 403配额/风控/未实名等(你提到的硅基未认证)
@@ -541,7 +534,6 @@ async function embedOnline(texts, provider, config, options = {}) {
const exp = Math.min(MAX_WAIT_MS, BASE_WAIT_MS * Math.pow(2, Math.min(attempt, 6) - 1));
const jitter = Math.floor(Math.random() * 350);
const waitMs = exp + jitter;
console.warn(`[embed ${reqId}] retryable error ${response.status}, wait ${waitMs}ms`);
await sleepAbortable(waitMs);
continue;
}
@@ -569,7 +561,6 @@ async function embedOnline(texts, provider, config, options = {}) {
const exp = Math.min(MAX_WAIT_MS, BASE_WAIT_MS * Math.pow(2, Math.min(attempt, 6) - 1));
const jitter = Math.floor(Math.random() * 350);
const waitMs = exp + jitter;
console.warn(`[embed ${reqId}] network/error, wait ${waitMs}ms then retry: ${e?.message || e}`);
await sleepAbortable(waitMs);
}
}

View File

@@ -12,6 +12,12 @@ import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js';
import { filterText } from './text-filter.js';
import {
searchStateAtoms,
buildL0FloorBonus,
stateToVirtualChunks,
mergeAndSparsify,
} from './state-recall.js';
const MODULE_ID = 'recall';
@@ -35,12 +41,16 @@ const CONFIG = {
MIN_SIMILARITY_EVENT: 0.65,
MMR_LAMBDA: 0.72,
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
FLOOR_LIMIT: 1,
};
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
// L0 配置
L0_FLOOR_BONUS_FACTOR: 0.10,
FLOOR_MAX_CHUNKS: 2,
FLOOR_LIMIT: 1,
};
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
@@ -136,10 +146,20 @@ function sortCausalEvents(causalArray) {
});
}
function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
function parseFloorRange(summary) {
if (!summary) return null;
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
if (!match) return null;
const start = Math.max(0, parseInt(match[1], 10) - 1);
const end = Math.max(0, (match[2] ? parseInt(match[2], 10) : parseInt(match[1], 10)) - 1);
return { start, end };
}
function cleanForRecall(text) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
@@ -308,7 +328,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig) {
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
@@ -321,12 +341,18 @@ async function searchChunks(queryVector, vectorConfig) {
const scored = chunkVectors.map(cv => {
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
const floor = match ? parseInt(match[1], 10) : 0;
const baseSim = cosineSimilarity(queryVector, cv.vector);
const l0Bonus = l0FloorBonus.get(floor) || 0;
return {
_id: cv.chunkId,
chunkId: cv.chunkId,
floor: match ? parseInt(match[1], 10) : 0,
chunkIdx: match ? parseInt(match[2], 10) : 0,
similarity: cosineSimilarity(queryVector, cv.vector),
floor,
chunkIdx: match ? parseInt(match[2], 10) : 0,
similarity: baseSim + l0Bonus,
_baseSimilarity: baseSim,
_l0Bonus: l0Bonus,
vector: cv.vector,
};
});
@@ -403,30 +429,19 @@ async function searchChunks(queryVector, vectorConfig) {
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) {
console.warn('[searchEvents] 早期返回: chatId或queryVector为空');
return [];
}
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
console.log('[searchEvents] fingerprint检查:', {
metaFp: meta.fingerprint,
currentFp: fp,
match: meta.fingerprint === fp || !meta.fingerprint,
});
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
console.log('[searchEvents] 向量数据:', {
eventVectorsCount: eventVectors.length,
vectorMapSize: vectorMap.size,
allEventsCount: allEvents?.length,
});
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) {
return [];
}
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
const userName = normalize(name1);
@@ -458,11 +473,23 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
bonus += CONFIG.BONUS_TEXT_HIT;
reasons.push('text');
}
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
}
// L0 加权:事件覆盖楼层范围命中
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
reasons.push('L0');
break;
}
}
}
return {
@@ -477,15 +504,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
vector: v,
};
});
// 相似度分布日志
const simValues = scored.map(s => s.similarity).sort((a, b) => b - a);
console.log('[searchEvents] 相似度分布前20:', simValues.slice(0, 20));
console.log('[searchEvents] 相似度分布后20:', simValues.slice(-20));
console.log('[searchEvents] 有向量的事件数:', scored.filter(s => s.similarity > 0).length);
console.log('[searchEvents] sim >= 0.6:', scored.filter(s => s.similarity >= 0.6).length);
console.log('[searchEvents] sim >= 0.5:', scored.filter(s => s.similarity >= 0.5).length);
console.log('[searchEvents] sim >= 0.3:', scored.filter(s => s.similarity >= 0.3).length);
// ★ 记录过滤前的分布(用 finalScore与显示一致
const preFilterDistribution = {
@@ -503,7 +521,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
.slice(0, CONFIG.CANDIDATE_EVENTS);
// 动态 K质量不够就少拿
@@ -575,7 +592,7 @@ function formatCausalTree(causalEvents, recalledEvents) {
// ═══════════════════════════════════════════════════════════════════════════
// 日志:主报告
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null, l0Results = [] }) {
const lines = [
'╔══════════════════════════════════════════════════════════════╗',
@@ -604,13 +621,36 @@ function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResult
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
});
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
lines.push('');
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【L0 语义锚点】状态变更加权信号 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
if (l0Results.length) {
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
lines.push(` 召回: ${l0Results.length}`);
lines.push(` 影响楼层: ${l0Floors.join(', ')}L1/L2 候选在这些楼层获得 +${CONFIG.L0_FLOOR_BONUS_FACTOR} 加分)`);
lines.push('');
l0Results.slice(0, 10).forEach((r, i) => {
lines.push(` ${String(i + 1).padStart(2)}. #${r.floor} ${r.atom.semantic.slice(0, 50)}${r.atom.semantic.length > 50 ? '...' : ''}`);
lines.push(` 相似度: ${r.similarity.toFixed(3)}`);
});
if (l0Results.length > 10) {
lines.push(` ... 还有 ${l0Results.length - 10}`);
}
} else {
lines.push(' 召回: 0 条(无 L0 数据或未启用)');
}
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【L1 原文片段】 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
@@ -706,16 +746,36 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
if (!queryVector?.length) {
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
}
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
// ════════════════════════════════════════════════════════════════════════
// L0 召回
// ════════════════════════════════════════════════════════════════════════
let l0Results = [];
let l0FloorBonus = new Map();
let l0VirtualChunks = [];
try {
l0Results = await searchStateAtoms(queryVector, vectorConfig);
l0FloorBonus = buildL0FloorBonus(l0Results, CONFIG.L0_FLOOR_BONUS_FACTOR);
l0VirtualChunks = stateToVirtualChunks(l0Results);
} catch (e) {
xbLog.warn(MODULE_ID, 'L0 召回失败,降级处理', e);
}
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig),
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus),
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
]);
const chunkPreFilterStats = chunkResults._preFilterStats || null;
// ════════════════════════════════════════════════════════════════════════
// 合并 L0 虚拟 chunks 到 L1
// ════════════════════════════════════════════════════════════════════════
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
// ─────────────────────────────────────────────────────────────────────
// 因果链追溯:从 eventResults 出发找祖先事件
@@ -742,25 +802,26 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
sortCausalEvents(causalEvents);
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
const elapsed = Math.round(performance.now() - T0);
const logText = formatRecallLog({
elapsed,
queryText,
segments,
weights,
const elapsed = Math.round(performance.now() - T0);
const logText = formatRecallLog({
elapsed,
queryText,
segments,
weights,
chunkResults: mergedChunks,
eventResults,
allEvents,
queryEntities,
causalEvents: causalEventsTruncated,
chunkPreFilterStats,
l0Results,
});
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
console.groupEnd();
});
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
console.log(`Elapsed: ${elapsed}ms | L0: ${l0Results.length} | Entities: ${queryEntities.join(', ') || '(none)'}`);
console.log(`L1: ${mergedChunks.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
console.groupEnd();
return { events: eventResults, causalEvents: causalEventsTruncated, chunks: mergedChunks, elapsed, logText, queryEntities, l0Results };
}

View File

@@ -0,0 +1,153 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - State Integration (L0)
// 事件监听 + 回滚钩子注册
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js';
import {
saveStateAtoms,
saveStateVectors,
deleteStateAtomsFromFloor,
deleteStateVectorsFromFloor,
getStateAtoms,
clearStateVectors,
} from './state-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { getVectorConfig } from '../data/config.js';
const MODULE_ID = 'state-integration';
let initialized = false;
// ═══════════════════════════════════════════════════════════════════════════
// 初始化
// ═══════════════════════════════════════════════════════════════════════════
export function initStateIntegration() {
if (initialized) return;
initialized = true;
// 监听变量团队的事件
$(document).on('xiaobaix:variables:stateAtomsGenerated', handleStateAtomsGenerated);
// 注册回滚钩子
globalThis.LWB_StateRollbackHook = handleStateRollback;
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
}
// ═══════════════════════════════════════════════════════════════════════════
// 事件处理
// ═══════════════════════════════════════════════════════════════════════════
async function handleStateAtomsGenerated(e, data) {
const { atoms } = data || {};
if (!atoms?.length) return;
const { chatId } = getContext();
if (!chatId) return;
const validAtoms = atoms.filter(a => a?.chatId === chatId);
if (!validAtoms.length) {
xbLog.warn(MODULE_ID, `atoms.chatId 不匹配,期望 ${chatId},跳过`);
return;
}
xbLog.info(MODULE_ID, `收到 ${validAtoms.length} 个 StateAtom`);
// 1. 存入 chat_metadata持久化
saveStateAtoms(validAtoms);
// 2. 向量化并存入 IndexedDB
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) {
xbLog.info(MODULE_ID, '向量未启用,跳过 L0 向量化');
return;
}
await vectorizeAtoms(chatId, validAtoms, vectorCfg);
}
async function vectorizeAtoms(chatId, atoms, vectorCfg) {
const texts = atoms.map(a => a.semantic);
const fingerprint = getEngineFingerprint(vectorCfg);
try {
const vectors = await embed(texts, vectorCfg);
const items = atoms.map((a, i) => ({
atomId: a.atomId,
floor: a.floor,
vector: vectors[i],
}));
await saveStateVectors(chatId, items, fingerprint);
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length}`);
} catch (e) {
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
// 不阻塞,向量可后续通过"生成向量"重建
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 回滚钩子
// ═══════════════════════════════════════════════════════════════════════════
async function handleStateRollback(floor) {
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
const { chatId } = getContext();
// 1. 删除 chat_metadata 中的 atoms
deleteStateAtomsFromFloor(floor);
// 2. 删除 IndexedDB 中的 vectors
if (chatId) {
await deleteStateVectorsFromFloor(chatId, floor);
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 重建向量(供"生成向量"按钮调用)
// ═══════════════════════════════════════════════════════════════════════════
export async function rebuildStateVectors(chatId, vectorCfg) {
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
const atoms = getStateAtoms();
if (!atoms.length) return { built: 0 };
xbLog.info(MODULE_ID, `开始重建 L0 向量: ${atoms.length} 个 atom`);
// 清空旧向量
await clearStateVectors(chatId);
// 重新向量化
const fingerprint = getEngineFingerprint(vectorCfg);
const batchSize = vectorCfg.engine === 'local' ? 5 : 25;
let built = 0;
for (let i = 0; i < atoms.length; i += batchSize) {
const batch = atoms.slice(i, i + batchSize);
const texts = batch.map(a => a.semantic);
try {
const vectors = await embed(texts, vectorCfg);
const items = batch.map((a, j) => ({
atomId: a.atomId,
floor: a.floor,
vector: vectors[j],
}));
await saveStateVectors(chatId, items, fingerprint);
built += items.length;
} catch (e) {
xbLog.error(MODULE_ID, `L0 向量化批次失败: ${i}-${i + batchSize}`, e);
}
}
xbLog.info(MODULE_ID, `L0 向量重建完成: ${built}/${atoms.length}`);
return { built };
}

View File

@@ -0,0 +1,160 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - State Recall (L0)
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import { getAllStateVectors, getStateAtoms } from './state-store.js';
import { getMeta } from './chunk-store.js';
import { getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
const MODULE_ID = 'state-recall';
const CONFIG = {
MAX_RESULTS: 20,
MIN_SIMILARITY: 0.55,
};
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
function cosineSimilarity(a, b) {
if (!a?.length || !b?.length || a.length !== b.length) return 0;
let dot = 0, nA = 0, nB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
nA += a[i] * a[i];
nB += b[i] * b[i];
}
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
}
// ═══════════════════════════════════════════════════════════════════════════
// L0 向量检索
// ═══════════════════════════════════════════════════════════════════════════
/**
* 检索与 query 相似的 StateAtoms
* @returns {Array<{atom, similarity}>}
*/
export async function searchStateAtoms(queryVector, vectorConfig) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
// 检查 fingerprint
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) {
xbLog.warn(MODULE_ID, 'fingerprint 不匹配,跳过 L0 召回');
return [];
}
// 获取向量
const stateVectors = await getAllStateVectors(chatId);
if (!stateVectors.length) return [];
// 获取 atoms用于关联 semantic 等字段)
const atoms = getStateAtoms();
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
// 计算相似度
const scored = stateVectors
.map(sv => {
const atom = atomMap.get(sv.atomId);
if (!atom) return null;
return {
atomId: sv.atomId,
floor: sv.floor,
similarity: cosineSimilarity(queryVector, sv.vector),
atom,
};
})
.filter(Boolean)
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.MAX_RESULTS);
return scored;
}
// ═══════════════════════════════════════════════════════════════════════════
// Floor Bonus 构建
// ═══════════════════════════════════════════════════════════════════════════
/**
* 构建 L0 相关楼层的加权映射
* @returns {Map<number, number>}
*/
export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
const floorBonus = new Map();
for (const r of l0Results || []) {
// 每个楼层只加一次,取最高相似度对应的 bonus
// 简化处理:统一加 bonusFactor不区分相似度高低
if (!floorBonus.has(r.floor)) {
floorBonus.set(r.floor, bonusFactor);
}
}
return floorBonus;
}
// ═══════════════════════════════════════════════════════════════════════════
// 虚拟 Chunk 转换
// ═══════════════════════════════════════════════════════════════════════════
/**
* 将 L0 结果转换为虚拟 chunk 格式
* 用于和 L1 chunks 统一处理
*/
export function stateToVirtualChunks(l0Results) {
return (l0Results || []).map(r => ({
chunkId: `state-${r.atomId}`,
floor: r.floor,
chunkIdx: -1, // 负值,排序时排在 L1 前面
speaker: '📌', // 固定标记
isUser: false,
text: r.atom.semantic,
textHash: null,
similarity: r.similarity,
isL0: true, // 标记字段
// 保留原始 atom 信息
_atom: r.atom,
}));
}
// ═══════════════════════════════════════════════════════════════════════════
// 每楼层稀疏去重
// ═══════════════════════════════════════════════════════════════════════════
/**
* 合并 L0 和 L1 chunks每楼层最多保留 limit 条
* @param {Array} l0Chunks - 虚拟 chunks已按相似度排序
* @param {Array} l1Chunks - 真实 chunks已按相似度排序
* @param {number} limit - 每楼层上限
* @returns {Array} 合并后的 chunks
*/
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
// 合并并按相似度排序
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
.sort((a, b) => b.similarity - a.similarity);
// 每楼层稀疏去重
const byFloor = new Map();
for (const c of all) {
const arr = byFloor.get(c.floor) || [];
if (arr.length < limit) {
arr.push(c);
byFloor.set(c.floor, arr);
}
}
// 扁平化并保持相似度排序
return Array.from(byFloor.values())
.flat()
.sort((a, b) => b.similarity - a.similarity);
}

View File

@@ -0,0 +1,187 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - State Store (L0)
// StateAtom 存 chat_metadata持久化
// StateVector 存 IndexedDB可重建
// ═══════════════════════════════════════════════════════════════════════════
import { saveMetadataDebounced } from '../../../../../../extensions.js';
import { chat_metadata } from '../../../../../../../script.js';
import { stateVectorsTable } from '../data/db.js';
import { EXT_ID } from '../../../core/constants.js';
import { xbLog } from '../../../core/debug-core.js';
const MODULE_ID = 'state-store';
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
export function float32ToBuffer(arr) {
return arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength);
}
export function bufferToFloat32(buffer) {
return new Float32Array(buffer);
}
// ═══════════════════════════════════════════════════════════════════════════
// StateAtom 操作chat_metadata
// ═══════════════════════════════════════════════════════════════════════════
function ensureStateAtomsArray() {
chat_metadata.extensions ||= {};
chat_metadata.extensions[EXT_ID] ||= {};
chat_metadata.extensions[EXT_ID].stateAtoms ||= [];
return chat_metadata.extensions[EXT_ID].stateAtoms;
}
/**
* 获取当前聊天的所有 StateAtoms
*/
export function getStateAtoms() {
return ensureStateAtomsArray();
}
/**
* 保存新的 StateAtoms追加去重
*/
export function saveStateAtoms(atoms) {
if (!atoms?.length) return;
const arr = ensureStateAtomsArray();
const existing = new Set(arr.map(a => a.atomId));
let added = 0;
for (const atom of atoms) {
// 有效性检查
if (!atom?.atomId || typeof atom.floor !== 'number' || atom.floor < 0 || !atom.semantic) {
xbLog.warn(MODULE_ID, `跳过无效 atom: ${atom?.atomId}`);
continue;
}
if (!existing.has(atom.atomId)) {
arr.push(atom);
existing.add(atom.atomId);
added++;
}
}
if (added > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `存储 ${added} 个 StateAtom`);
}
}
/**
* 删除指定楼层及之后的 StateAtoms
*/
export function deleteStateAtomsFromFloor(floor) {
const arr = ensureStateAtomsArray();
const before = arr.length;
const filtered = arr.filter(a => a.floor < floor);
chat_metadata.extensions[EXT_ID].stateAtoms = filtered;
const deleted = before - filtered.length;
if (deleted > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateAtom (floor >= ${floor})`);
}
return deleted;
}
/**
* 清空所有 StateAtoms
*/
export function clearStateAtoms() {
const arr = ensureStateAtomsArray();
const count = arr.length;
chat_metadata.extensions[EXT_ID].stateAtoms = [];
if (count > 0) {
saveMetadataDebounced();
xbLog.info(MODULE_ID, `清空 ${count} 个 StateAtom`);
}
}
/**
* 获取 StateAtoms 数量
*/
export function getStateAtomsCount() {
return ensureStateAtomsArray().length;
}
// ═══════════════════════════════════════════════════════════════════════════
// StateVector 操作IndexedDB
// ═══════════════════════════════════════════════════════════════════════════
/**
* 保存 StateVectors
*/
export async function saveStateVectors(chatId, items, fingerprint) {
if (!chatId || !items?.length) return;
const records = items.map(item => ({
chatId,
atomId: item.atomId,
floor: item.floor,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
fingerprint,
}));
await stateVectorsTable.bulkPut(records);
xbLog.info(MODULE_ID, `存储 ${records.length} 个 StateVector`);
}
/**
* 获取所有 StateVectors
*/
export async function getAllStateVectors(chatId) {
if (!chatId) return [];
const records = await stateVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
}));
}
/**
* 删除指定楼层及之后的 StateVectors
*/
export async function deleteStateVectorsFromFloor(chatId, floor) {
if (!chatId) return;
const deleted = await stateVectorsTable
.where('chatId')
.equals(chatId)
.filter(v => v.floor >= floor)
.delete();
if (deleted > 0) {
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateVector (floor >= ${floor})`);
}
}
/**
* 清空所有 StateVectors
*/
export async function clearStateVectors(chatId) {
if (!chatId) return;
const deleted = await stateVectorsTable.where('chatId').equals(chatId).delete();
if (deleted > 0) {
xbLog.info(MODULE_ID, `清空 ${deleted} 个 StateVector`);
}
}
/**
* 获取 StateVectors 数量
*/
export async function getStateVectorsCount(chatId) {
if (!chatId) return 0;
return await stateVectorsTable.where('chatId').equals(chatId).count();
}

View File

@@ -18,6 +18,14 @@ import {
clearEventVectors,
saveEventVectors,
} from './chunk-store.js';
import {
getStateAtoms,
saveStateAtoms,
clearStateAtoms,
getAllStateVectors,
saveStateVectors,
clearStateVectors,
} from './state-store.js';
import { getEngineFingerprint } from './embedder.js';
import { getVectorConfig } from '../data/config.js';
@@ -81,13 +89,18 @@ export async function exportVectors(onProgress) {
const chunks = await getAllChunks(chatId);
const chunkVectors = await getAllChunkVectors(chatId);
const eventVectors = await getAllEventVectors(chatId);
const stateAtoms = getStateAtoms();
const stateVectors = await getAllStateVectors(chatId);
if (chunks.length === 0 && eventVectors.length === 0) {
if (chunkVectors.length === 0 && eventVectors.length === 0 && stateVectors.length === 0) {
throw new Error('没有可导出的向量数据');
}
// 确定维度
const dims = chunkVectors[0]?.vector?.length || eventVectors[0]?.vector?.length || 0;
const dims = chunkVectors[0]?.vector?.length
|| eventVectors[0]?.vector?.length
|| stateVectors[0]?.vector?.length
|| 0;
if (dims === 0) {
throw new Error('无法确定向量维度');
}
@@ -123,6 +136,14 @@ export async function exportVectors(onProgress) {
// event_vectors.bin
const eventVectorsOrdered = sortedEventVectors.map(ev => ev.vector);
// state vectors
const sortedStateVectors = [...stateVectors].sort((a, b) => String(a.atomId).localeCompare(String(b.atomId)));
const stateVectorsOrdered = sortedStateVectors.map(v => v.vector);
const stateVectorsJsonl = sortedStateVectors.map(v => JSON.stringify({
atomId: v.atomId,
floor: v.floor,
})).join('\n');
// manifest
const manifest = {
version: EXPORT_VERSION,
@@ -133,6 +154,8 @@ export async function exportVectors(onProgress) {
chunkCount: sortedChunks.length,
chunkVectorCount: chunkVectors.length,
eventCount: sortedEventVectors.length,
stateAtomCount: stateAtoms.length,
stateVectorCount: stateVectors.length,
lastChunkFloor: meta.lastChunkFloor ?? -1,
};
@@ -145,6 +168,11 @@ export async function exportVectors(onProgress) {
'chunk_vectors.bin': float32ToBytes(chunkVectorsOrdered, dims),
'events.jsonl': strToU8(eventsJsonl),
'event_vectors.bin': float32ToBytes(eventVectorsOrdered, dims),
'state_atoms.json': strToU8(JSON.stringify(stateAtoms)),
'state_vectors.jsonl': strToU8(stateVectorsJsonl),
'state_vectors.bin': stateVectorsOrdered.length
? float32ToBytes(stateVectorsOrdered, dims)
: new Uint8Array(0),
}, { level: 1 }); // 降低压缩级别,速度优先
onProgress?.('下载文件...');
@@ -238,6 +266,21 @@ export async function importVectors(file, onProgress) {
const eventVectorsBytes = unzipped['event_vectors.bin'];
const eventVectors = eventVectorsBytes ? bytesToFloat32(eventVectorsBytes, manifest.dims) : [];
// 解析 L0 state atoms
const stateAtoms = unzipped['state_atoms.json']
? JSON.parse(strFromU8(unzipped['state_atoms.json']))
: [];
// 解析 L0 state vectors metas
const stateVectorsJsonl = unzipped['state_vectors.jsonl'] ? strFromU8(unzipped['state_vectors.jsonl']) : '';
const stateVectorMetas = stateVectorsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// 解析 L0 state vectors
const stateVectorsBytes = unzipped['state_vectors.bin'];
const stateVectors = (stateVectorsBytes && stateVectorMetas.length)
? bytesToFloat32(stateVectorsBytes, manifest.dims)
: [];
// 校验数量
if (chunkMetas.length !== chunkVectors.length) {
throw new Error(`chunk 数量不匹配: 元数据 ${chunkMetas.length}, 向量 ${chunkVectors.length}`);
@@ -245,12 +288,17 @@ export async function importVectors(file, onProgress) {
if (eventMetas.length !== eventVectors.length) {
throw new Error(`event 数量不匹配: 元数据 ${eventMetas.length}, 向量 ${eventVectors.length}`);
}
if (stateVectorMetas.length !== stateVectors.length) {
throw new Error(`state 向量数量不匹配: 元数据 ${stateVectorMetas.length}, 向量 ${stateVectors.length}`);
}
onProgress?.('清空旧数据...');
// 清空当前数据
await clearAllChunks(chatId);
await clearEventVectors(chatId);
await clearStateVectors(chatId);
clearStateAtoms();
onProgress?.('写入数据...');
@@ -284,13 +332,28 @@ export async function importVectors(file, onProgress) {
await saveEventVectors(chatId, eventVectorItems, manifest.fingerprint);
}
// 写入 state atoms
if (stateAtoms.length > 0) {
saveStateAtoms(stateAtoms);
}
// 写入 state vectors
if (stateVectorMetas.length > 0) {
const stateVectorItems = stateVectorMetas.map((meta, idx) => ({
atomId: meta.atomId,
floor: meta.floor,
vector: stateVectors[idx],
}));
await saveStateVectors(chatId, stateVectorItems, manifest.fingerprint);
}
// 更新 meta
await updateMeta(chatId, {
fingerprint: manifest.fingerprint,
lastChunkFloor: manifest.lastChunkFloor,
});
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events`);
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events, ${stateAtoms.length} state atoms`);
return {
chunkCount: chunkMetas.length,