feat: variables 2.0 state + L0 summary integration
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
import Dexie from '../../../libs/dexie.mjs';
|
||||
|
||||
const DB_NAME = 'LittleWhiteBox_Memory';
|
||||
const DB_VERSION = 2;
|
||||
const DB_VERSION = 3; // 升级版本
|
||||
|
||||
// Chunk parameters
|
||||
export const CHUNK_MAX_TOKENS = 200;
|
||||
@@ -15,6 +15,7 @@ db.version(DB_VERSION).stores({
|
||||
chunks: '[chatId+chunkId], chatId, [chatId+floor]',
|
||||
chunkVectors: '[chatId+chunkId], chatId',
|
||||
eventVectors: '[chatId+eventId], chatId',
|
||||
stateVectors: '[chatId+atomId], chatId, [chatId+floor]', // L0 向量表
|
||||
});
|
||||
|
||||
export { db };
|
||||
@@ -22,3 +23,4 @@ export const metaTable = db.meta;
|
||||
export const chunksTable = db.chunks;
|
||||
export const chunkVectorsTable = db.chunkVectors;
|
||||
export const eventVectorsTable = db.eventVectors;
|
||||
export const stateVectorsTable = db.stateVectors;
|
||||
|
||||
@@ -128,9 +128,16 @@ function formatArcLine(a) {
|
||||
return `- ${a.name}:${a.trajectory}`;
|
||||
}
|
||||
|
||||
// 完整 chunk 输出(不截断)
|
||||
// 完整 chunk 输出(支持 L0 虚拟 chunk)
|
||||
function formatChunkFullLine(c) {
|
||||
const { name1, name2 } = getContext();
|
||||
|
||||
// L0 虚拟 chunk
|
||||
if (c.isL0) {
|
||||
return `› #${c.floor + 1} [📌] ${String(c.text || "").trim()}`;
|
||||
}
|
||||
|
||||
// L1 真实 chunk
|
||||
const speaker = c.isUser ? (name1 || "用户") : (name2 || "角色");
|
||||
return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
|
||||
}
|
||||
|
||||
@@ -75,6 +75,8 @@ import {
|
||||
syncOnMessageSwiped,
|
||||
syncOnMessageReceived,
|
||||
} from "./vector/chunk-builder.js";
|
||||
import { initStateIntegration, rebuildStateVectors } from "./vector/state-integration.js";
|
||||
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/state-store.js";
|
||||
|
||||
// vector io
|
||||
import { exportVectors, importVectors } from "./vector/vector-io.js";
|
||||
@@ -210,6 +212,8 @@ async function sendVectorStatsToFrame() {
|
||||
const stats = await getStorageStats(chatId);
|
||||
const chunkStatus = await getChunkBuildStatus();
|
||||
const totalMessages = chat?.length || 0;
|
||||
const stateAtomsCount = getStateAtomsCount();
|
||||
const stateVectorsCount = await getStateVectorsCount(chatId);
|
||||
|
||||
const cfg = getVectorConfig();
|
||||
let mismatch = false;
|
||||
@@ -228,6 +232,8 @@ async function sendVectorStatsToFrame() {
|
||||
builtFloors: chunkStatus.builtFloors,
|
||||
totalFloors: chunkStatus.totalFloors,
|
||||
totalMessages,
|
||||
stateAtoms: stateAtomsCount,
|
||||
stateVectors: stateVectorsCount,
|
||||
},
|
||||
mismatch,
|
||||
});
|
||||
@@ -350,6 +356,14 @@ async function handleGenerateVectors(vectorCfg) {
|
||||
const batchSize = isLocal ? 5 : 25;
|
||||
const concurrency = isLocal ? 1 : 2;
|
||||
|
||||
// L0 向量重建
|
||||
try {
|
||||
await rebuildStateVectors(chatId, vectorCfg);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, "L0 向量重建失败", e);
|
||||
// 不阻塞,继续 L1/L2
|
||||
}
|
||||
|
||||
await clearAllChunks(chatId);
|
||||
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
|
||||
|
||||
@@ -649,6 +663,7 @@ async function handleClearVectors() {
|
||||
|
||||
await clearEventVectors(chatId);
|
||||
await clearAllChunks(chatId);
|
||||
await clearStateVectors(chatId);
|
||||
await updateMeta(chatId, { lastChunkFloor: -1 });
|
||||
await sendVectorStatsToFrame();
|
||||
await executeSlashCommand('/echo severity=info 向量数据已清除。如需恢复召回功能,请重新点击"生成向量"。');
|
||||
@@ -1400,7 +1415,7 @@ async function handleGenerationStarted(type, _params, isDryRun) {
|
||||
|
||||
// 2) depth:倒序插入,从末尾往前数
|
||||
// 最小为 1,避免插入到最底部导致 AI 看到的最后是总结
|
||||
const depth = Math.max(1, chatLen - boundary - 1);
|
||||
const depth = Math.max(2, chatLen - boundary - 1);
|
||||
if (depth < 0) return;
|
||||
|
||||
// 3) 构建注入文本(保持原逻辑)
|
||||
@@ -1504,4 +1519,5 @@ $(document).on("xiaobaix:storySummary:toggle", (_e, enabled) => {
|
||||
jQuery(() => {
|
||||
if (!getSettings().storySummary?.enabled) return;
|
||||
registerEvents();
|
||||
initStateIntegration();
|
||||
});
|
||||
|
||||
@@ -50,8 +50,10 @@ export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
|
||||
|
||||
// 1. 应用用户自定义过滤规则
|
||||
// 2. 移除 TTS 标记(硬编码)
|
||||
// 3. 移除 <state> 标签(硬编码,L0 已单独存储)
|
||||
const cleanText = filterText(text)
|
||||
.replace(/\[tts:[^\]]*\]/gi, '')
|
||||
.replace(/<state>[\s\S]*?<\/state>/gi, '')
|
||||
.trim();
|
||||
|
||||
if (!cleanText) return [];
|
||||
|
||||
@@ -471,8 +471,6 @@ async function embedOnline(texts, provider, config, options = {}) {
|
||||
const providerConfig = ONLINE_PROVIDERS[provider];
|
||||
const baseUrl = (providerConfig?.baseUrl || url || '').replace(/\/+$/, '');
|
||||
|
||||
const reqId = Math.random().toString(36).slice(2, 6);
|
||||
|
||||
// 永远重试:指数退避 + 上限 + 抖动
|
||||
const BASE_WAIT_MS = 1200;
|
||||
const MAX_WAIT_MS = 15000;
|
||||
@@ -491,9 +489,6 @@ async function embedOnline(texts, provider, config, options = {}) {
|
||||
let attempt = 0;
|
||||
while (true) {
|
||||
attempt++;
|
||||
const startTime = Date.now();
|
||||
console.log(`[embed ${reqId}] send ${texts.length} items (attempt ${attempt})`);
|
||||
|
||||
try {
|
||||
let response;
|
||||
|
||||
@@ -526,8 +521,6 @@ async function embedOnline(texts, provider, config, options = {}) {
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[embed ${reqId}] status=${response.status} time=${Date.now() - startTime}ms`);
|
||||
|
||||
// 需要“永远重试”的典型状态:
|
||||
// - 429:限流
|
||||
// - 403:配额/风控/未实名等(你提到的硅基未认证)
|
||||
@@ -541,7 +534,6 @@ async function embedOnline(texts, provider, config, options = {}) {
|
||||
const exp = Math.min(MAX_WAIT_MS, BASE_WAIT_MS * Math.pow(2, Math.min(attempt, 6) - 1));
|
||||
const jitter = Math.floor(Math.random() * 350);
|
||||
const waitMs = exp + jitter;
|
||||
console.warn(`[embed ${reqId}] retryable error ${response.status}, wait ${waitMs}ms`);
|
||||
await sleepAbortable(waitMs);
|
||||
continue;
|
||||
}
|
||||
@@ -569,7 +561,6 @@ async function embedOnline(texts, provider, config, options = {}) {
|
||||
const exp = Math.min(MAX_WAIT_MS, BASE_WAIT_MS * Math.pow(2, Math.min(attempt, 6) - 1));
|
||||
const jitter = Math.floor(Math.random() * 350);
|
||||
const waitMs = exp + jitter;
|
||||
console.warn(`[embed ${reqId}] network/error, wait ${waitMs}ms then retry: ${e?.message || e}`);
|
||||
await sleepAbortable(waitMs);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,12 @@ import { xbLog } from '../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getSummaryStore } from '../data/store.js';
|
||||
import { filterText } from './text-filter.js';
|
||||
import {
|
||||
searchStateAtoms,
|
||||
buildL0FloorBonus,
|
||||
stateToVirtualChunks,
|
||||
mergeAndSparsify,
|
||||
} from './state-recall.js';
|
||||
|
||||
const MODULE_ID = 'recall';
|
||||
|
||||
@@ -35,12 +41,16 @@ const CONFIG = {
|
||||
MIN_SIMILARITY_EVENT: 0.65,
|
||||
MMR_LAMBDA: 0.72,
|
||||
|
||||
BONUS_PARTICIPANT_HIT: 0.08,
|
||||
BONUS_TEXT_HIT: 0.05,
|
||||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
||||
|
||||
FLOOR_LIMIT: 1,
|
||||
};
|
||||
BONUS_PARTICIPANT_HIT: 0.08,
|
||||
BONUS_TEXT_HIT: 0.05,
|
||||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
||||
|
||||
// L0 配置
|
||||
L0_FLOOR_BONUS_FACTOR: 0.10,
|
||||
FLOOR_MAX_CHUNKS: 2,
|
||||
|
||||
FLOOR_LIMIT: 1,
|
||||
};
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
@@ -136,10 +146,20 @@ function sortCausalEvents(causalArray) {
|
||||
});
|
||||
}
|
||||
|
||||
function normalize(s) {
|
||||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||||
}
|
||||
|
||||
function normalize(s) {
|
||||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||||
}
|
||||
|
||||
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
|
||||
function parseFloorRange(summary) {
|
||||
if (!summary) return null;
|
||||
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
|
||||
if (!match) return null;
|
||||
const start = Math.max(0, parseInt(match[1], 10) - 1);
|
||||
const end = Math.max(0, (match[2] ? parseInt(match[2], 10) : parseInt(match[1], 10)) - 1);
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
function cleanForRecall(text) {
|
||||
// 1. 应用用户自定义过滤规则
|
||||
// 2. 移除 TTS 标记(硬编码)
|
||||
@@ -308,7 +328,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
||||
// L1 Chunks 检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchChunks(queryVector, vectorConfig) {
|
||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
@@ -321,12 +341,18 @@ async function searchChunks(queryVector, vectorConfig) {
|
||||
|
||||
const scored = chunkVectors.map(cv => {
|
||||
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
|
||||
const floor = match ? parseInt(match[1], 10) : 0;
|
||||
const baseSim = cosineSimilarity(queryVector, cv.vector);
|
||||
const l0Bonus = l0FloorBonus.get(floor) || 0;
|
||||
|
||||
return {
|
||||
_id: cv.chunkId,
|
||||
chunkId: cv.chunkId,
|
||||
floor: match ? parseInt(match[1], 10) : 0,
|
||||
chunkIdx: match ? parseInt(match[2], 10) : 0,
|
||||
similarity: cosineSimilarity(queryVector, cv.vector),
|
||||
floor,
|
||||
chunkIdx: match ? parseInt(match[2], 10) : 0,
|
||||
similarity: baseSim + l0Bonus,
|
||||
_baseSimilarity: baseSim,
|
||||
_l0Bonus: l0Bonus,
|
||||
vector: cv.vector,
|
||||
};
|
||||
});
|
||||
@@ -403,30 +429,19 @@ async function searchChunks(queryVector, vectorConfig) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L2 Events 检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
|
||||
const { chatId, name1 } = getContext();
|
||||
if (!chatId || !queryVector?.length) {
|
||||
console.warn('[searchEvents] 早期返回: chatId或queryVector为空');
|
||||
return [];
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
console.log('[searchEvents] fingerprint检查:', {
|
||||
metaFp: meta.fingerprint,
|
||||
currentFp: fp,
|
||||
match: meta.fingerprint === fp || !meta.fingerprint,
|
||||
});
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
console.log('[searchEvents] 向量数据:', {
|
||||
eventVectorsCount: eventVectors.length,
|
||||
vectorMapSize: vectorMap.size,
|
||||
allEventsCount: allEvents?.length,
|
||||
});
|
||||
|
||||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
|
||||
const { chatId, name1 } = getContext();
|
||||
if (!chatId || !queryVector?.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
if (!vectorMap.size) return [];
|
||||
|
||||
const userName = normalize(name1);
|
||||
@@ -458,11 +473,23 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
bonus += CONFIG.BONUS_TEXT_HIT;
|
||||
reasons.push('text');
|
||||
}
|
||||
|
||||
// world topic 命中
|
||||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
||||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
||||
reasons.push('world');
|
||||
|
||||
// world topic 命中
|
||||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
||||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
||||
reasons.push('world');
|
||||
}
|
||||
|
||||
// L0 加权:事件覆盖楼层范围命中
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (range) {
|
||||
for (let f = range.start; f <= range.end; f++) {
|
||||
if (l0FloorBonus.has(f)) {
|
||||
bonus += l0FloorBonus.get(f);
|
||||
reasons.push('L0');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -477,15 +504,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
vector: v,
|
||||
};
|
||||
});
|
||||
|
||||
// 相似度分布日志
|
||||
const simValues = scored.map(s => s.similarity).sort((a, b) => b - a);
|
||||
console.log('[searchEvents] 相似度分布(前20):', simValues.slice(0, 20));
|
||||
console.log('[searchEvents] 相似度分布(后20):', simValues.slice(-20));
|
||||
console.log('[searchEvents] 有向量的事件数:', scored.filter(s => s.similarity > 0).length);
|
||||
console.log('[searchEvents] sim >= 0.6:', scored.filter(s => s.similarity >= 0.6).length);
|
||||
console.log('[searchEvents] sim >= 0.5:', scored.filter(s => s.similarity >= 0.5).length);
|
||||
console.log('[searchEvents] sim >= 0.3:', scored.filter(s => s.similarity >= 0.3).length);
|
||||
|
||||
// ★ 记录过滤前的分布(用 finalScore,与显示一致)
|
||||
const preFilterDistribution = {
|
||||
@@ -503,7 +521,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
const candidates = scored
|
||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
|
||||
// 动态 K:质量不够就少拿
|
||||
@@ -575,7 +592,7 @@ function formatCausalTree(causalEvents, recalledEvents) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 日志:主报告
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null, l0Results = [] }) {
|
||||
const lines = [
|
||||
'╔══════════════════════════════════════════════════════════════╗',
|
||||
@@ -604,13 +621,36 @@ function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResult
|
||||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||||
});
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||||
|
||||
lines.push('');
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【L0 语义锚点】状态变更加权信号 │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
if (l0Results.length) {
|
||||
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
||||
lines.push(` 召回: ${l0Results.length} 条`);
|
||||
lines.push(` 影响楼层: ${l0Floors.join(', ')}(L1/L2 候选在这些楼层获得 +${CONFIG.L0_FLOOR_BONUS_FACTOR} 加分)`);
|
||||
lines.push('');
|
||||
|
||||
l0Results.slice(0, 10).forEach((r, i) => {
|
||||
lines.push(` ${String(i + 1).padStart(2)}. #${r.floor} ${r.atom.semantic.slice(0, 50)}${r.atom.semantic.length > 50 ? '...' : ''}`);
|
||||
lines.push(` 相似度: ${r.similarity.toFixed(3)}`);
|
||||
});
|
||||
|
||||
if (l0Results.length > 10) {
|
||||
lines.push(` ... 还有 ${l0Results.length - 10} 条`);
|
||||
}
|
||||
} else {
|
||||
lines.push(' 召回: 0 条(无 L0 数据或未启用)');
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【L1 原文片段】 │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
@@ -706,16 +746,36 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
if (!queryVector?.length) {
|
||||
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
|
||||
}
|
||||
|
||||
const lexicon = buildEntityLexicon(store, allEvents);
|
||||
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
|
||||
|
||||
const lexicon = buildEntityLexicon(store, allEvents);
|
||||
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
|
||||
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
// L0 召回
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
let l0Results = [];
|
||||
let l0FloorBonus = new Map();
|
||||
let l0VirtualChunks = [];
|
||||
|
||||
try {
|
||||
l0Results = await searchStateAtoms(queryVector, vectorConfig);
|
||||
l0FloorBonus = buildL0FloorBonus(l0Results, CONFIG.L0_FLOOR_BONUS_FACTOR);
|
||||
l0VirtualChunks = stateToVirtualChunks(l0Results);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, 'L0 召回失败,降级处理', e);
|
||||
}
|
||||
|
||||
const [chunkResults, eventResults] = await Promise.all([
|
||||
searchChunks(queryVector, vectorConfig),
|
||||
const [chunkResults, eventResults] = await Promise.all([
|
||||
searchChunks(queryVector, vectorConfig, l0FloorBonus),
|
||||
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
|
||||
]);
|
||||
|
||||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||||
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
// 合并 L0 虚拟 chunks 到 L1
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// 因果链追溯:从 eventResults 出发找祖先事件
|
||||
@@ -742,25 +802,26 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
sortCausalEvents(causalEvents);
|
||||
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
|
||||
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
const logText = formatRecallLog({
|
||||
elapsed,
|
||||
queryText,
|
||||
segments,
|
||||
weights,
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
const logText = formatRecallLog({
|
||||
elapsed,
|
||||
queryText,
|
||||
segments,
|
||||
weights,
|
||||
chunkResults: mergedChunks,
|
||||
eventResults,
|
||||
allEvents,
|
||||
queryEntities,
|
||||
causalEvents: causalEventsTruncated,
|
||||
chunkPreFilterStats,
|
||||
l0Results,
|
||||
});
|
||||
|
||||
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
|
||||
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
|
||||
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
|
||||
console.groupEnd();
|
||||
|
||||
});
|
||||
|
||||
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
|
||||
console.log(`Elapsed: ${elapsed}ms | L0: ${l0Results.length} | Entities: ${queryEntities.join(', ') || '(none)'}`);
|
||||
console.log(`L1: ${mergedChunks.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
|
||||
console.groupEnd();
|
||||
|
||||
return { events: eventResults, causalEvents: causalEventsTruncated, chunks: mergedChunks, elapsed, logText, queryEntities, l0Results };
|
||||
}
|
||||
|
||||
|
||||
153
modules/story-summary/vector/state-integration.js
Normal file
153
modules/story-summary/vector/state-integration.js
Normal file
@@ -0,0 +1,153 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Integration (L0)
|
||||
// 事件监听 + 回滚钩子注册
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import {
|
||||
saveStateAtoms,
|
||||
saveStateVectors,
|
||||
deleteStateAtomsFromFloor,
|
||||
deleteStateVectorsFromFloor,
|
||||
getStateAtoms,
|
||||
clearStateVectors,
|
||||
} from './state-store.js';
|
||||
import { embed, getEngineFingerprint } from './embedder.js';
|
||||
import { getVectorConfig } from '../data/config.js';
|
||||
|
||||
const MODULE_ID = 'state-integration';
|
||||
|
||||
let initialized = false;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 初始化
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export function initStateIntegration() {
|
||||
if (initialized) return;
|
||||
initialized = true;
|
||||
|
||||
// 监听变量团队的事件
|
||||
$(document).on('xiaobaix:variables:stateAtomsGenerated', handleStateAtomsGenerated);
|
||||
|
||||
// 注册回滚钩子
|
||||
globalThis.LWB_StateRollbackHook = handleStateRollback;
|
||||
|
||||
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 事件处理
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function handleStateAtomsGenerated(e, data) {
|
||||
const { atoms } = data || {};
|
||||
if (!atoms?.length) return;
|
||||
|
||||
const { chatId } = getContext();
|
||||
if (!chatId) return;
|
||||
|
||||
const validAtoms = atoms.filter(a => a?.chatId === chatId);
|
||||
if (!validAtoms.length) {
|
||||
xbLog.warn(MODULE_ID, `atoms.chatId 不匹配,期望 ${chatId},跳过`);
|
||||
return;
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `收到 ${validAtoms.length} 个 StateAtom`);
|
||||
|
||||
// 1. 存入 chat_metadata(持久化)
|
||||
saveStateAtoms(validAtoms);
|
||||
|
||||
// 2. 向量化并存入 IndexedDB
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) {
|
||||
xbLog.info(MODULE_ID, '向量未启用,跳过 L0 向量化');
|
||||
return;
|
||||
}
|
||||
|
||||
await vectorizeAtoms(chatId, validAtoms, vectorCfg);
|
||||
}
|
||||
|
||||
async function vectorizeAtoms(chatId, atoms, vectorCfg) {
|
||||
const texts = atoms.map(a => a.semantic);
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
|
||||
const items = atoms.map((a, i) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[i],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 个`);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
|
||||
// 不阻塞,向量可后续通过"生成向量"重建
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 回滚钩子
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function handleStateRollback(floor) {
|
||||
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
|
||||
|
||||
const { chatId } = getContext();
|
||||
|
||||
// 1. 删除 chat_metadata 中的 atoms
|
||||
deleteStateAtomsFromFloor(floor);
|
||||
|
||||
// 2. 删除 IndexedDB 中的 vectors
|
||||
if (chatId) {
|
||||
await deleteStateVectorsFromFloor(chatId, floor);
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 重建向量(供"生成向量"按钮调用)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function rebuildStateVectors(chatId, vectorCfg) {
|
||||
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
|
||||
|
||||
const atoms = getStateAtoms();
|
||||
if (!atoms.length) return { built: 0 };
|
||||
|
||||
xbLog.info(MODULE_ID, `开始重建 L0 向量: ${atoms.length} 个 atom`);
|
||||
|
||||
// 清空旧向量
|
||||
await clearStateVectors(chatId);
|
||||
|
||||
// 重新向量化
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
const batchSize = vectorCfg.engine === 'local' ? 5 : 25;
|
||||
let built = 0;
|
||||
|
||||
for (let i = 0; i < atoms.length; i += batchSize) {
|
||||
const batch = atoms.slice(i, i + batchSize);
|
||||
const texts = batch.map(a => a.semantic);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
|
||||
const items = batch.map((a, j) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[j],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
built += items.length;
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `L0 向量化批次失败: ${i}-${i + batchSize}`, e);
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `L0 向量重建完成: ${built}/${atoms.length}`);
|
||||
return { built };
|
||||
}
|
||||
160
modules/story-summary/vector/state-recall.js
Normal file
160
modules/story-summary/vector/state-recall.js
Normal file
@@ -0,0 +1,160 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Recall (L0)
|
||||
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getAllStateVectors, getStateAtoms } from './state-store.js';
|
||||
import { getMeta } from './chunk-store.js';
|
||||
import { getEngineFingerprint } from './embedder.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'state-recall';
|
||||
|
||||
const CONFIG = {
|
||||
MAX_RESULTS: 20,
|
||||
MIN_SIMILARITY: 0.55,
|
||||
};
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function cosineSimilarity(a, b) {
|
||||
if (!a?.length || !b?.length || a.length !== b.length) return 0;
|
||||
let dot = 0, nA = 0, nB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
nA += a[i] * a[i];
|
||||
nB += b[i] * b[i];
|
||||
}
|
||||
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L0 向量检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 检索与 query 相似的 StateAtoms
|
||||
* @returns {Array<{atom, similarity}>}
|
||||
*/
|
||||
export async function searchStateAtoms(queryVector, vectorConfig) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
// 检查 fingerprint
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) {
|
||||
xbLog.warn(MODULE_ID, 'fingerprint 不匹配,跳过 L0 召回');
|
||||
return [];
|
||||
}
|
||||
|
||||
// 获取向量
|
||||
const stateVectors = await getAllStateVectors(chatId);
|
||||
if (!stateVectors.length) return [];
|
||||
|
||||
// 获取 atoms(用于关联 semantic 等字段)
|
||||
const atoms = getStateAtoms();
|
||||
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
|
||||
|
||||
// 计算相似度
|
||||
const scored = stateVectors
|
||||
.map(sv => {
|
||||
const atom = atomMap.get(sv.atomId);
|
||||
if (!atom) return null;
|
||||
|
||||
return {
|
||||
atomId: sv.atomId,
|
||||
floor: sv.floor,
|
||||
similarity: cosineSimilarity(queryVector, sv.vector),
|
||||
atom,
|
||||
};
|
||||
})
|
||||
.filter(Boolean)
|
||||
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, CONFIG.MAX_RESULTS);
|
||||
|
||||
return scored;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Floor Bonus 构建
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 构建 L0 相关楼层的加权映射
|
||||
* @returns {Map<number, number>}
|
||||
*/
|
||||
export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
|
||||
const floorBonus = new Map();
|
||||
|
||||
for (const r of l0Results || []) {
|
||||
// 每个楼层只加一次,取最高相似度对应的 bonus
|
||||
// 简化处理:统一加 bonusFactor,不区分相似度高低
|
||||
if (!floorBonus.has(r.floor)) {
|
||||
floorBonus.set(r.floor, bonusFactor);
|
||||
}
|
||||
}
|
||||
|
||||
return floorBonus;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 虚拟 Chunk 转换
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 将 L0 结果转换为虚拟 chunk 格式
|
||||
* 用于和 L1 chunks 统一处理
|
||||
*/
|
||||
export function stateToVirtualChunks(l0Results) {
|
||||
return (l0Results || []).map(r => ({
|
||||
chunkId: `state-${r.atomId}`,
|
||||
floor: r.floor,
|
||||
chunkIdx: -1, // 负值,排序时排在 L1 前面
|
||||
speaker: '📌', // 固定标记
|
||||
isUser: false,
|
||||
text: r.atom.semantic,
|
||||
textHash: null,
|
||||
similarity: r.similarity,
|
||||
isL0: true, // 标记字段
|
||||
// 保留原始 atom 信息
|
||||
_atom: r.atom,
|
||||
}));
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 每楼层稀疏去重
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 合并 L0 和 L1 chunks,每楼层最多保留 limit 条
|
||||
* @param {Array} l0Chunks - 虚拟 chunks(已按相似度排序)
|
||||
* @param {Array} l1Chunks - 真实 chunks(已按相似度排序)
|
||||
* @param {number} limit - 每楼层上限
|
||||
* @returns {Array} 合并后的 chunks
|
||||
*/
|
||||
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
|
||||
// 合并并按相似度排序
|
||||
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
|
||||
.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
// 每楼层稀疏去重
|
||||
const byFloor = new Map();
|
||||
|
||||
for (const c of all) {
|
||||
const arr = byFloor.get(c.floor) || [];
|
||||
if (arr.length < limit) {
|
||||
arr.push(c);
|
||||
byFloor.set(c.floor, arr);
|
||||
}
|
||||
}
|
||||
|
||||
// 扁平化并保持相似度排序
|
||||
return Array.from(byFloor.values())
|
||||
.flat()
|
||||
.sort((a, b) => b.similarity - a.similarity);
|
||||
}
|
||||
187
modules/story-summary/vector/state-store.js
Normal file
187
modules/story-summary/vector/state-store.js
Normal file
@@ -0,0 +1,187 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Store (L0)
|
||||
// StateAtom 存 chat_metadata(持久化)
|
||||
// StateVector 存 IndexedDB(可重建)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { saveMetadataDebounced } from '../../../../../../extensions.js';
|
||||
import { chat_metadata } from '../../../../../../../script.js';
|
||||
import { stateVectorsTable } from '../data/db.js';
|
||||
import { EXT_ID } from '../../../core/constants.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'state-store';
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export function float32ToBuffer(arr) {
|
||||
return arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength);
|
||||
}
|
||||
|
||||
export function bufferToFloat32(buffer) {
|
||||
return new Float32Array(buffer);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// StateAtom 操作(chat_metadata)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function ensureStateAtomsArray() {
|
||||
chat_metadata.extensions ||= {};
|
||||
chat_metadata.extensions[EXT_ID] ||= {};
|
||||
chat_metadata.extensions[EXT_ID].stateAtoms ||= [];
|
||||
return chat_metadata.extensions[EXT_ID].stateAtoms;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前聊天的所有 StateAtoms
|
||||
*/
|
||||
export function getStateAtoms() {
|
||||
return ensureStateAtomsArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存新的 StateAtoms(追加,去重)
|
||||
*/
|
||||
export function saveStateAtoms(atoms) {
|
||||
if (!atoms?.length) return;
|
||||
|
||||
const arr = ensureStateAtomsArray();
|
||||
const existing = new Set(arr.map(a => a.atomId));
|
||||
|
||||
let added = 0;
|
||||
for (const atom of atoms) {
|
||||
// 有效性检查
|
||||
if (!atom?.atomId || typeof atom.floor !== 'number' || atom.floor < 0 || !atom.semantic) {
|
||||
xbLog.warn(MODULE_ID, `跳过无效 atom: ${atom?.atomId}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!existing.has(atom.atomId)) {
|
||||
arr.push(atom);
|
||||
existing.add(atom.atomId);
|
||||
added++;
|
||||
}
|
||||
}
|
||||
|
||||
if (added > 0) {
|
||||
saveMetadataDebounced();
|
||||
xbLog.info(MODULE_ID, `存储 ${added} 个 StateAtom`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除指定楼层及之后的 StateAtoms
|
||||
*/
|
||||
export function deleteStateAtomsFromFloor(floor) {
|
||||
const arr = ensureStateAtomsArray();
|
||||
const before = arr.length;
|
||||
|
||||
const filtered = arr.filter(a => a.floor < floor);
|
||||
chat_metadata.extensions[EXT_ID].stateAtoms = filtered;
|
||||
|
||||
const deleted = before - filtered.length;
|
||||
if (deleted > 0) {
|
||||
saveMetadataDebounced();
|
||||
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateAtom (floor >= ${floor})`);
|
||||
}
|
||||
|
||||
return deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* 清空所有 StateAtoms
|
||||
*/
|
||||
export function clearStateAtoms() {
|
||||
const arr = ensureStateAtomsArray();
|
||||
const count = arr.length;
|
||||
|
||||
chat_metadata.extensions[EXT_ID].stateAtoms = [];
|
||||
|
||||
if (count > 0) {
|
||||
saveMetadataDebounced();
|
||||
xbLog.info(MODULE_ID, `清空 ${count} 个 StateAtom`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 StateAtoms 数量
|
||||
*/
|
||||
export function getStateAtomsCount() {
|
||||
return ensureStateAtomsArray().length;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// StateVector 操作(IndexedDB)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 保存 StateVectors
|
||||
*/
|
||||
export async function saveStateVectors(chatId, items, fingerprint) {
|
||||
if (!chatId || !items?.length) return;
|
||||
|
||||
const records = items.map(item => ({
|
||||
chatId,
|
||||
atomId: item.atomId,
|
||||
floor: item.floor,
|
||||
vector: float32ToBuffer(new Float32Array(item.vector)),
|
||||
dims: item.vector.length,
|
||||
fingerprint,
|
||||
}));
|
||||
|
||||
await stateVectorsTable.bulkPut(records);
|
||||
xbLog.info(MODULE_ID, `存储 ${records.length} 个 StateVector`);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有 StateVectors
|
||||
*/
|
||||
export async function getAllStateVectors(chatId) {
|
||||
if (!chatId) return [];
|
||||
|
||||
const records = await stateVectorsTable.where('chatId').equals(chatId).toArray();
|
||||
return records.map(r => ({
|
||||
...r,
|
||||
vector: bufferToFloat32(r.vector),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除指定楼层及之后的 StateVectors
|
||||
*/
|
||||
export async function deleteStateVectorsFromFloor(chatId, floor) {
|
||||
if (!chatId) return;
|
||||
|
||||
const deleted = await stateVectorsTable
|
||||
.where('chatId')
|
||||
.equals(chatId)
|
||||
.filter(v => v.floor >= floor)
|
||||
.delete();
|
||||
|
||||
if (deleted > 0) {
|
||||
xbLog.info(MODULE_ID, `删除 ${deleted} 个 StateVector (floor >= ${floor})`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 清空所有 StateVectors
|
||||
*/
|
||||
export async function clearStateVectors(chatId) {
|
||||
if (!chatId) return;
|
||||
|
||||
const deleted = await stateVectorsTable.where('chatId').equals(chatId).delete();
|
||||
if (deleted > 0) {
|
||||
xbLog.info(MODULE_ID, `清空 ${deleted} 个 StateVector`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 StateVectors 数量
|
||||
*/
|
||||
export async function getStateVectorsCount(chatId) {
|
||||
if (!chatId) return 0;
|
||||
return await stateVectorsTable.where('chatId').equals(chatId).count();
|
||||
}
|
||||
@@ -18,6 +18,14 @@ import {
|
||||
clearEventVectors,
|
||||
saveEventVectors,
|
||||
} from './chunk-store.js';
|
||||
import {
|
||||
getStateAtoms,
|
||||
saveStateAtoms,
|
||||
clearStateAtoms,
|
||||
getAllStateVectors,
|
||||
saveStateVectors,
|
||||
clearStateVectors,
|
||||
} from './state-store.js';
|
||||
import { getEngineFingerprint } from './embedder.js';
|
||||
import { getVectorConfig } from '../data/config.js';
|
||||
|
||||
@@ -81,13 +89,18 @@ export async function exportVectors(onProgress) {
|
||||
const chunks = await getAllChunks(chatId);
|
||||
const chunkVectors = await getAllChunkVectors(chatId);
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const stateAtoms = getStateAtoms();
|
||||
const stateVectors = await getAllStateVectors(chatId);
|
||||
|
||||
if (chunks.length === 0 && eventVectors.length === 0) {
|
||||
if (chunkVectors.length === 0 && eventVectors.length === 0 && stateVectors.length === 0) {
|
||||
throw new Error('没有可导出的向量数据');
|
||||
}
|
||||
|
||||
// 确定维度
|
||||
const dims = chunkVectors[0]?.vector?.length || eventVectors[0]?.vector?.length || 0;
|
||||
const dims = chunkVectors[0]?.vector?.length
|
||||
|| eventVectors[0]?.vector?.length
|
||||
|| stateVectors[0]?.vector?.length
|
||||
|| 0;
|
||||
if (dims === 0) {
|
||||
throw new Error('无法确定向量维度');
|
||||
}
|
||||
@@ -123,6 +136,14 @@ export async function exportVectors(onProgress) {
|
||||
// event_vectors.bin
|
||||
const eventVectorsOrdered = sortedEventVectors.map(ev => ev.vector);
|
||||
|
||||
// state vectors
|
||||
const sortedStateVectors = [...stateVectors].sort((a, b) => String(a.atomId).localeCompare(String(b.atomId)));
|
||||
const stateVectorsOrdered = sortedStateVectors.map(v => v.vector);
|
||||
const stateVectorsJsonl = sortedStateVectors.map(v => JSON.stringify({
|
||||
atomId: v.atomId,
|
||||
floor: v.floor,
|
||||
})).join('\n');
|
||||
|
||||
// manifest
|
||||
const manifest = {
|
||||
version: EXPORT_VERSION,
|
||||
@@ -133,6 +154,8 @@ export async function exportVectors(onProgress) {
|
||||
chunkCount: sortedChunks.length,
|
||||
chunkVectorCount: chunkVectors.length,
|
||||
eventCount: sortedEventVectors.length,
|
||||
stateAtomCount: stateAtoms.length,
|
||||
stateVectorCount: stateVectors.length,
|
||||
lastChunkFloor: meta.lastChunkFloor ?? -1,
|
||||
};
|
||||
|
||||
@@ -145,6 +168,11 @@ export async function exportVectors(onProgress) {
|
||||
'chunk_vectors.bin': float32ToBytes(chunkVectorsOrdered, dims),
|
||||
'events.jsonl': strToU8(eventsJsonl),
|
||||
'event_vectors.bin': float32ToBytes(eventVectorsOrdered, dims),
|
||||
'state_atoms.json': strToU8(JSON.stringify(stateAtoms)),
|
||||
'state_vectors.jsonl': strToU8(stateVectorsJsonl),
|
||||
'state_vectors.bin': stateVectorsOrdered.length
|
||||
? float32ToBytes(stateVectorsOrdered, dims)
|
||||
: new Uint8Array(0),
|
||||
}, { level: 1 }); // 降低压缩级别,速度优先
|
||||
|
||||
onProgress?.('下载文件...');
|
||||
@@ -238,6 +266,21 @@ export async function importVectors(file, onProgress) {
|
||||
const eventVectorsBytes = unzipped['event_vectors.bin'];
|
||||
const eventVectors = eventVectorsBytes ? bytesToFloat32(eventVectorsBytes, manifest.dims) : [];
|
||||
|
||||
// 解析 L0 state atoms
|
||||
const stateAtoms = unzipped['state_atoms.json']
|
||||
? JSON.parse(strFromU8(unzipped['state_atoms.json']))
|
||||
: [];
|
||||
|
||||
// 解析 L0 state vectors metas
|
||||
const stateVectorsJsonl = unzipped['state_vectors.jsonl'] ? strFromU8(unzipped['state_vectors.jsonl']) : '';
|
||||
const stateVectorMetas = stateVectorsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
|
||||
|
||||
// 解析 L0 state vectors
|
||||
const stateVectorsBytes = unzipped['state_vectors.bin'];
|
||||
const stateVectors = (stateVectorsBytes && stateVectorMetas.length)
|
||||
? bytesToFloat32(stateVectorsBytes, manifest.dims)
|
||||
: [];
|
||||
|
||||
// 校验数量
|
||||
if (chunkMetas.length !== chunkVectors.length) {
|
||||
throw new Error(`chunk 数量不匹配: 元数据 ${chunkMetas.length}, 向量 ${chunkVectors.length}`);
|
||||
@@ -245,12 +288,17 @@ export async function importVectors(file, onProgress) {
|
||||
if (eventMetas.length !== eventVectors.length) {
|
||||
throw new Error(`event 数量不匹配: 元数据 ${eventMetas.length}, 向量 ${eventVectors.length}`);
|
||||
}
|
||||
if (stateVectorMetas.length !== stateVectors.length) {
|
||||
throw new Error(`state 向量数量不匹配: 元数据 ${stateVectorMetas.length}, 向量 ${stateVectors.length}`);
|
||||
}
|
||||
|
||||
onProgress?.('清空旧数据...');
|
||||
|
||||
// 清空当前数据
|
||||
await clearAllChunks(chatId);
|
||||
await clearEventVectors(chatId);
|
||||
await clearStateVectors(chatId);
|
||||
clearStateAtoms();
|
||||
|
||||
onProgress?.('写入数据...');
|
||||
|
||||
@@ -284,13 +332,28 @@ export async function importVectors(file, onProgress) {
|
||||
await saveEventVectors(chatId, eventVectorItems, manifest.fingerprint);
|
||||
}
|
||||
|
||||
// 写入 state atoms
|
||||
if (stateAtoms.length > 0) {
|
||||
saveStateAtoms(stateAtoms);
|
||||
}
|
||||
|
||||
// 写入 state vectors
|
||||
if (stateVectorMetas.length > 0) {
|
||||
const stateVectorItems = stateVectorMetas.map((meta, idx) => ({
|
||||
atomId: meta.atomId,
|
||||
floor: meta.floor,
|
||||
vector: stateVectors[idx],
|
||||
}));
|
||||
await saveStateVectors(chatId, stateVectorItems, manifest.fingerprint);
|
||||
}
|
||||
|
||||
// 更新 meta
|
||||
await updateMeta(chatId, {
|
||||
fingerprint: manifest.fingerprint,
|
||||
lastChunkFloor: manifest.lastChunkFloor,
|
||||
});
|
||||
|
||||
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events`);
|
||||
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events, ${stateAtoms.length} state atoms`);
|
||||
|
||||
return {
|
||||
chunkCount: chunkMetas.length,
|
||||
|
||||
Reference in New Issue
Block a user