Sync local version

This commit is contained in:
2026-01-26 01:16:35 +08:00
parent 3ad32da21a
commit c1202c2ca2
27 changed files with 16595 additions and 2369 deletions

View File

@@ -0,0 +1,360 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Chunk Builder
// 标准 RAG chunking: ~200 tokens per chunk
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import {
getMeta,
updateMeta,
saveChunks,
saveChunkVectors,
clearAllChunks,
deleteChunksFromFloor,
deleteChunksAtFloor,
makeChunkId,
hashText,
CHUNK_MAX_TOKENS,
} from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
const MODULE_ID = 'chunk-builder';
// ═══════════════════════════════════════════════════════════════════════════
// Token 估算
// ═══════════════════════════════════════════════════════════════════════════
function estimateTokens(text) {
if (!text) return 0;
const chinese = (text.match(/[\u4e00-\u9fff]/g) || []).length;
const other = text.length - chinese;
return Math.ceil(chinese + other / 4);
}
function splitSentences(text) {
if (!text) return [];
const parts = text.split(/(?<=[。!?\n])|(?<=[.!?]\s)/);
return parts.map(s => s.trim()).filter(s => s.length > 0);
}
// ═══════════════════════════════════════════════════════════════════════════
// Chunk 切分
// ═══════════════════════════════════════════════════════════════════════════
export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
const text = message.mes || '';
const speaker = message.name || (message.is_user ? '用户' : '角色');
const isUser = !!message.is_user;
const cleanText = text
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
.replace(/\[tts:[^\]]*\]/gi, '')
.trim();
if (!cleanText) return [];
const totalTokens = estimateTokens(cleanText);
if (totalTokens <= maxTokens) {
return [{
chunkId: makeChunkId(floor, 0),
floor,
chunkIdx: 0,
speaker,
isUser,
text: cleanText,
textHash: hashText(cleanText),
}];
}
const sentences = splitSentences(cleanText);
const chunks = [];
let currentSentences = [];
let currentTokens = 0;
for (const sent of sentences) {
const sentTokens = estimateTokens(sent);
if (sentTokens > maxTokens) {
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
const sliceSize = maxTokens * 2;
for (let i = 0; i < sent.length; i += sliceSize) {
const slice = sent.slice(i, i + sliceSize);
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: slice,
textHash: hashText(slice),
});
}
continue;
}
if (currentTokens + sentTokens > maxTokens && currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
currentSentences = [];
currentTokens = 0;
}
currentSentences.push(sent);
currentTokens += sentTokens;
}
if (currentSentences.length > 0) {
const chunkText = currentSentences.join('');
chunks.push({
chunkId: makeChunkId(floor, chunks.length),
floor,
chunkIdx: chunks.length,
speaker,
isUser,
text: chunkText,
textHash: hashText(chunkText),
});
}
return chunks;
}
// ═══════════════════════════════════════════════════════════════════════════
// 构建状态
// ═══════════════════════════════════════════════════════════════════════════
export async function getChunkBuildStatus() {
const { chat, chatId } = getContext();
if (!chatId) {
return { totalFloors: 0, builtFloors: 0, pending: 0 };
}
const meta = await getMeta(chatId);
const totalFloors = chat?.length || 0;
const builtFloors = meta.lastChunkFloor + 1;
return {
totalFloors,
builtFloors,
lastChunkFloor: meta.lastChunkFloor,
pending: Math.max(0, totalFloors - builtFloors),
};
}
// ═══════════════════════════════════════════════════════════════════════════
// 全量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildAllChunks(options = {}) {
const { onProgress, shouldCancel, vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0, errors: 0 };
}
const fingerprint = getEngineFingerprint(vectorConfig);
await clearAllChunks(chatId);
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
const allChunks = [];
for (let floor = 0; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
allChunks.push(...chunks);
}
if (allChunks.length === 0) {
return { built: 0, errors: 0 };
}
xbLog.info(MODULE_ID, `开始构建 ${allChunks.length} 个 chunks${chat.length} 层楼)`);
await saveChunks(chatId, allChunks);
const texts = allChunks.map(c => c.text);
const isLocal = vectorConfig.engine === 'local';
const batchSize = isLocal ? 5 : 20;
let completed = 0;
let errors = 0;
const allVectors = [];
for (let i = 0; i < texts.length; i += batchSize) {
if (shouldCancel?.()) break;
const batch = texts.slice(i, i + batchSize);
try {
const vectors = await embed(batch, vectorConfig);
allVectors.push(...vectors);
completed += batch.length;
onProgress?.(completed, texts.length);
} catch (e) {
xbLog.error(MODULE_ID, `批次 ${i}/${texts.length} 向量化失败`, e);
allVectors.push(...batch.map(() => null));
errors++;
}
}
if (shouldCancel?.()) {
return { built: completed, errors };
}
const vectorItems = allChunks
.map((chunk, idx) => allVectors[idx] ? { chunkId: chunk.chunkId, vector: allVectors[idx] } : null)
.filter(Boolean);
if (vectorItems.length > 0) {
await saveChunkVectors(chatId, vectorItems, fingerprint);
}
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
xbLog.info(MODULE_ID, `构建完成:${vectorItems.length} 个向量,${errors} 个错误`);
return { built: vectorItems.length, errors };
}
// ═══════════════════════════════════════════════════════════════════════════
// 增量构建
// ═══════════════════════════════════════════════════════════════════════════
export async function buildIncrementalChunks(options = {}) {
const { vectorConfig } = options;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) {
return { built: 0 };
}
const meta = await getMeta(chatId);
const fingerprint = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fingerprint) {
xbLog.warn(MODULE_ID, '引擎指纹不匹配,跳过增量构建');
return { built: 0 };
}
const startFloor = meta.lastChunkFloor + 1;
if (startFloor >= chat.length) {
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量构建 ${startFloor} - ${chat.length - 1}`);
const newChunks = [];
for (let floor = startFloor; floor < chat.length; floor++) {
const chunks = chunkMessage(floor, chat[floor]);
newChunks.push(...chunks);
}
if (newChunks.length === 0) {
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: 0 };
}
await saveChunks(chatId, newChunks);
const texts = newChunks.map(c => c.text);
try {
const vectors = await embed(texts, vectorConfig);
const vectorItems = newChunks.map((chunk, idx) => ({
chunkId: chunk.chunkId,
vector: vectors[idx],
}));
await saveChunkVectors(chatId, vectorItems, fingerprint);
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
return { built: vectorItems.length };
} catch (e) {
xbLog.error(MODULE_ID, '增量向量化失败', e);
return { built: 0 };
}
}
// ═══════════════════════════════════════════════════════════════════════════
// L1 同步(消息变化时调用)
// ═══════════════════════════════════════════════════════════════════════════
/**
* 消息删除后同步:删除 floor >= newLength 的 chunk
*/
export async function syncOnMessageDeleted(chatId, newLength) {
if (!chatId || newLength < 0) return;
await deleteChunksFromFloor(chatId, newLength);
await updateMeta(chatId, { lastChunkFloor: newLength - 1 });
xbLog.info(MODULE_ID, `消息删除同步:删除 floor >= ${newLength}`);
}
/**
* swipe 后同步:删除最后楼层的 chunk等待后续重建
*/
export async function syncOnMessageSwiped(chatId, lastFloor) {
if (!chatId || lastFloor < 0) return;
await deleteChunksAtFloor(chatId, lastFloor);
await updateMeta(chatId, { lastChunkFloor: lastFloor - 1 });
xbLog.info(MODULE_ID, `swipe 同步:删除 floor ${lastFloor}`);
}
/**
* 新消息后同步:删除 + 重建最后楼层
*/
export async function syncOnMessageReceived(chatId, lastFloor, message, vectorConfig) {
if (!chatId || lastFloor < 0 || !message) return;
if (!vectorConfig?.enabled) return;
// 删除该楼层旧的
await deleteChunksAtFloor(chatId, lastFloor);
// 重建
const chunks = chunkMessage(lastFloor, message);
if (chunks.length === 0) return;
await saveChunks(chatId, chunks);
// 向量化
const fingerprint = getEngineFingerprint(vectorConfig);
const texts = chunks.map(c => c.text);
try {
const vectors = await embed(texts, vectorConfig);
const items = chunks.map((c, i) => ({ chunkId: c.chunkId, vector: vectors[i] }));
await saveChunkVectors(chatId, items, fingerprint);
await updateMeta(chatId, { lastChunkFloor: lastFloor });
xbLog.info(MODULE_ID, `消息同步:重建 floor ${lastFloor}${chunks.length} 个 chunk`);
} catch (e) {
xbLog.error(MODULE_ID, `消息同步失败floor ${lastFloor}`, e);
}
}

View File

@@ -0,0 +1,247 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Chunk Store (L1/L2 storage)
// ═══════════════════════════════════════════════════════════════════════════
import {
metaTable,
chunksTable,
chunkVectorsTable,
eventVectorsTable,
CHUNK_MAX_TOKENS,
} from '../data/db.js';
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
export function float32ToBuffer(arr) {
return arr.buffer.slice(arr.byteOffset, arr.byteOffset + arr.byteLength);
}
export function bufferToFloat32(buffer) {
return new Float32Array(buffer);
}
export function makeChunkId(floor, chunkIdx) {
return `c-${floor}-${chunkIdx}`;
}
export function hashText(text) {
let hash = 0;
for (let i = 0; i < text.length; i++) {
hash = ((hash << 5) - hash + text.charCodeAt(i)) | 0;
}
return hash.toString(36);
}
// ═══════════════════════════════════════════════════════════════════════════
// Meta 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function getMeta(chatId) {
let meta = await metaTable.get(chatId);
if (!meta) {
meta = {
chatId,
fingerprint: null,
lastChunkFloor: -1,
updatedAt: Date.now(),
};
await metaTable.put(meta);
}
return meta;
}
export async function updateMeta(chatId, updates) {
await metaTable.update(chatId, {
...updates,
updatedAt: Date.now(),
});
}
// ═══════════════════════════════════════════════════════════════════════════
// Chunks 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveChunks(chatId, chunks) {
const records = chunks.map(chunk => ({
chatId,
chunkId: chunk.chunkId,
floor: chunk.floor,
chunkIdx: chunk.chunkIdx,
speaker: chunk.speaker,
isUser: chunk.isUser,
text: chunk.text,
textHash: chunk.textHash,
createdAt: Date.now(),
}));
await chunksTable.bulkPut(records);
}
export async function getAllChunks(chatId) {
return await chunksTable.where('chatId').equals(chatId).toArray();
}
export async function getChunksByFloors(chatId, floors) {
const chunks = await chunksTable
.where('[chatId+floor]')
.anyOf(floors.map(f => [chatId, f]))
.toArray();
return chunks;
}
/**
* 删除指定楼层及之后的所有 chunk 和向量
*/
export async function deleteChunksFromFloor(chatId, fromFloor) {
const chunks = await chunksTable
.where('chatId')
.equals(chatId)
.filter(c => c.floor >= fromFloor)
.toArray();
const chunkIds = chunks.map(c => c.chunkId);
await chunksTable
.where('chatId')
.equals(chatId)
.filter(c => c.floor >= fromFloor)
.delete();
for (const chunkId of chunkIds) {
await chunkVectorsTable.delete([chatId, chunkId]);
}
}
/**
* 删除指定楼层的 chunk 和向量
*/
export async function deleteChunksAtFloor(chatId, floor) {
const chunks = await chunksTable
.where('[chatId+floor]')
.equals([chatId, floor])
.toArray();
const chunkIds = chunks.map(c => c.chunkId);
await chunksTable.where('[chatId+floor]').equals([chatId, floor]).delete();
for (const chunkId of chunkIds) {
await chunkVectorsTable.delete([chatId, chunkId]);
}
}
export async function clearAllChunks(chatId) {
await chunksTable.where('chatId').equals(chatId).delete();
await chunkVectorsTable.where('chatId').equals(chatId).delete();
}
// ═══════════════════════════════════════════════════════════════════════════
// ChunkVectors 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveChunkVectors(chatId, items, fingerprint) {
const records = items.map(item => ({
chatId,
chunkId: item.chunkId,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
fingerprint,
}));
await chunkVectorsTable.bulkPut(records);
}
export async function getAllChunkVectors(chatId) {
const records = await chunkVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
}));
}
// ═══════════════════════════════════════════════════════════════════════════
// EventVectors 表操作
// ═══════════════════════════════════════════════════════════════════════════
export async function saveEventVectors(chatId, items, fingerprint) {
const records = items.map(item => ({
chatId,
eventId: item.eventId,
vector: float32ToBuffer(new Float32Array(item.vector)),
dims: item.vector.length,
fingerprint,
}));
await eventVectorsTable.bulkPut(records);
}
export async function getAllEventVectors(chatId) {
const records = await eventVectorsTable.where('chatId').equals(chatId).toArray();
return records.map(r => ({
...r,
vector: bufferToFloat32(r.vector),
}));
}
export async function clearEventVectors(chatId) {
await eventVectorsTable.where('chatId').equals(chatId).delete();
}
/**
* 按 ID 列表删除 event 向量
*/
export async function deleteEventVectorsByIds(chatId, eventIds) {
for (const eventId of eventIds) {
await eventVectorsTable.delete([chatId, eventId]);
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 统计与工具
// ═══════════════════════════════════════════════════════════════════════════
export async function getStorageStats(chatId) {
const [meta, chunkCount, chunkVectorCount, eventCount] = await Promise.all([
getMeta(chatId),
chunksTable.where('chatId').equals(chatId).count(),
chunkVectorsTable.where('chatId').equals(chatId).count(),
eventVectorsTable.where('chatId').equals(chatId).count(),
]);
return {
fingerprint: meta.fingerprint,
lastChunkFloor: meta.lastChunkFloor,
chunks: chunkCount,
chunkVectors: chunkVectorCount,
eventVectors: eventCount,
};
}
export async function clearChatData(chatId) {
await Promise.all([
metaTable.delete(chatId),
chunksTable.where('chatId').equals(chatId).delete(),
chunkVectorsTable.where('chatId').equals(chatId).delete(),
eventVectorsTable.where('chatId').equals(chatId).delete(),
]);
}
export async function ensureFingerprintMatch(chatId, newFingerprint) {
const meta = await getMeta(chatId);
if (meta.fingerprint && meta.fingerprint !== newFingerprint) {
await Promise.all([
chunkVectorsTable.where('chatId').equals(chatId).delete(),
eventVectorsTable.where('chatId').equals(chatId).delete(),
]);
await updateMeta(chatId, {
fingerprint: newFingerprint,
lastChunkFloor: -1,
});
return false;
}
if (!meta.fingerprint) {
await updateMeta(chatId, { fingerprint: newFingerprint });
}
return true;
}
export { CHUNK_MAX_TOKENS };

View File

@@ -0,0 +1,624 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Embedding Service
// 统一的向量生成接口(本地模型 / 在线服务)
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../core/debug-core.js';
const MODULE_ID = 'embedding';
// ═══════════════════════════════════════════════════════════════════════════
// 本地模型配置
// ═══════════════════════════════════════════════════════════════════════════
export const LOCAL_MODELS = {
'bge-small-zh': {
id: 'bge-small-zh',
name: '中文轻量 (51MB)',
hfId: 'Xenova/bge-small-zh-v1.5',
dims: 512,
desc: '手机/低配适用',
},
'bge-base-zh': {
id: 'bge-base-zh',
name: '中文标准 (102MB)',
hfId: 'Xenova/bge-base-zh-v1.5',
dims: 768,
desc: 'PC 推荐,效果更好',
},
'e5-small': {
id: 'e5-small',
name: '多语言 (118MB)',
hfId: 'Xenova/multilingual-e5-small',
dims: 384,
desc: '非中文用户',
},
};
export const DEFAULT_LOCAL_MODEL = 'bge-small-zh';
// ═══════════════════════════════════════════════════════════════════════════
// 在线服务配置
// ═══════════════════════════════════════════════════════════════════════════
export const ONLINE_PROVIDERS = {
siliconflow: {
id: 'siliconflow',
name: '硅基流动',
baseUrl: 'https://api.siliconflow.cn',
canFetchModels: false,
defaultModels: [
'BAAI/bge-m3',
'BAAI/bge-large-zh-v1.5',
'BAAI/bge-small-zh-v1.5',
],
},
cohere: {
id: 'cohere',
name: 'Cohere',
baseUrl: 'https://api.cohere.ai',
canFetchModels: false,
defaultModels: [
'embed-multilingual-v3.0',
'embed-english-v3.0',
],
// Cohere 使用不同的 API 格式
customEmbed: true,
},
openai: {
id: 'openai',
name: 'OpenAI 兼容',
baseUrl: '',
canFetchModels: true,
defaultModels: [],
},
};
// ═══════════════════════════════════════════════════════════════════════════
// 本地模型状态管理
// ═══════════════════════════════════════════════════════════════════════════
// 已加载的模型实例:{ modelId: pipeline }
const loadedPipelines = {};
// 当前正在下载的模型
let downloadingModelId = null;
let downloadAbortController = null;
// Worker for local embedding
let embeddingWorker = null;
let workerRequestId = 0;
const workerCallbacks = new Map();
function getWorker() {
if (!embeddingWorker) {
const workerPath = new URL('./embedder.worker.js', import.meta.url).href;
embeddingWorker = new Worker(workerPath, { type: 'module' });
embeddingWorker.onmessage = (e) => {
const { requestId, ...data } = e.data || {};
const callback = workerCallbacks.get(requestId);
if (callback) {
callback(data);
if (data.type === 'result' || data.type === 'error' || data.type === 'loaded') {
workerCallbacks.delete(requestId);
}
}
};
}
return embeddingWorker;
}
function workerRequest(message) {
return new Promise((resolve, reject) => {
const requestId = ++workerRequestId;
const worker = getWorker();
workerCallbacks.set(requestId, (data) => {
if (data.type === 'error') {
reject(new Error(data.error));
} else if (data.type === 'result') {
resolve(data.vectors);
} else if (data.type === 'loaded') {
resolve(true);
}
});
worker.postMessage({ ...message, requestId });
});
}
// ═══════════════════════════════════════════════════════════════════════════
// 本地模型操作
// ═══════════════════════════════════════════════════════════════════════════
/**
* 检查指定本地模型的状态
* 只读取缓存,绝不触发下载
*/
export async function checkLocalModelStatus(modelId = DEFAULT_LOCAL_MODEL) {
const modelConfig = LOCAL_MODELS[modelId];
if (!modelConfig) {
return { status: 'error', message: '未知模型' };
}
// 已加载到内存
if (loadedPipelines[modelId]) {
return { status: 'ready', message: '已就绪' };
}
// 正在下载
if (downloadingModelId === modelId) {
return { status: 'downloading', message: '下载中' };
}
// 检查 IndexedDB 缓存
const hasCache = await checkModelCache(modelConfig.hfId);
if (hasCache) {
return { status: 'cached', message: '已缓存,可加载' };
}
return { status: 'not_downloaded', message: '未下载' };
}
/**
* 检查 IndexedDB 中是否有模型缓存
*/
async function checkModelCache(hfId) {
return new Promise((resolve) => {
try {
const request = indexedDB.open('transformers-cache', 1);
request.onerror = () => resolve(false);
request.onsuccess = (event) => {
const db = event.target.result;
const storeNames = Array.from(db.objectStoreNames);
db.close();
// 检查是否有该模型的缓存
const modelKey = hfId.replace('/', '_');
const hasModel = storeNames.some(name =>
name.includes(modelKey) || name.includes('onnx')
);
resolve(hasModel);
};
request.onupgradeneeded = () => resolve(false);
} catch {
resolve(false);
}
});
}
/**
* 下载/加载本地模型
* @param {string} modelId - 模型ID
* @param {Function} onProgress - 进度回调 (0-100)
* @returns {Promise<boolean>}
*/
export async function downloadLocalModel(modelId = DEFAULT_LOCAL_MODEL, onProgress) {
const modelConfig = LOCAL_MODELS[modelId];
if (!modelConfig) {
throw new Error(`未知模型: ${modelId}`);
}
// 已加载
if (loadedPipelines[modelId]) {
onProgress?.(100);
return true;
}
// 正在下载其他模型
if (downloadingModelId && downloadingModelId !== modelId) {
throw new Error(`正在下载其他模型: ${downloadingModelId}`);
}
// 正在下载同一模型,等待完成
if (downloadingModelId === modelId) {
xbLog.info(MODULE_ID, `模型 ${modelId} 正在加载中...`);
return new Promise((resolve, reject) => {
const check = () => {
if (loadedPipelines[modelId]) {
resolve(true);
} else if (downloadingModelId !== modelId) {
reject(new Error('下载已取消'));
} else {
setTimeout(check, 200);
}
};
check();
});
}
downloadingModelId = modelId;
downloadAbortController = new AbortController();
try {
xbLog.info(MODULE_ID, `开始下载模型: ${modelId}`);
return await new Promise((resolve, reject) => {
const requestId = ++workerRequestId;
const worker = getWorker();
workerCallbacks.set(requestId, (data) => {
if (data.type === 'progress') {
onProgress?.(data.percent);
} else if (data.type === 'loaded') {
loadedPipelines[modelId] = true;
workerCallbacks.delete(requestId);
resolve(true);
} else if (data.type === 'error') {
workerCallbacks.delete(requestId);
reject(new Error(data.error));
}
});
worker.postMessage({
type: 'load',
modelId,
hfId: modelConfig.hfId,
requestId
});
});
} finally {
downloadingModelId = null;
downloadAbortController = null;
}
}
export function cancelDownload() {
if (downloadAbortController) {
downloadAbortController.abort();
xbLog.info(MODULE_ID, '下载已取消');
}
downloadingModelId = null;
downloadAbortController = null;
}
/**
* 删除指定模型的缓存
*/
export async function deleteLocalModelCache(modelId = null) {
try {
// 删除 IndexedDB
await new Promise((resolve, reject) => {
const request = indexedDB.deleteDatabase('transformers-cache');
request.onsuccess = () => resolve();
request.onerror = () => reject(request.error);
request.onblocked = () => {
xbLog.warn(MODULE_ID, 'IndexedDB 删除被阻塞');
resolve();
};
});
// 删除 CacheStorage
if (window.caches) {
const cacheNames = await window.caches.keys();
for (const name of cacheNames) {
if (name.includes('transformers') || name.includes('huggingface') || name.includes('xenova')) {
await window.caches.delete(name);
}
}
}
// 清除内存中的 pipeline
if (modelId && loadedPipelines[modelId]) {
delete loadedPipelines[modelId];
} else {
Object.keys(loadedPipelines).forEach(key => delete loadedPipelines[key]);
}
xbLog.info(MODULE_ID, '模型缓存已清除');
return true;
} catch (e) {
xbLog.error(MODULE_ID, '清除缓存失败', e);
throw e;
}
}
/**
* 使用本地模型生成向量
*/
async function embedLocal(texts, modelId = DEFAULT_LOCAL_MODEL) {
if (!loadedPipelines[modelId]) {
await downloadLocalModel(modelId);
}
return await workerRequest({ type: 'embed', texts });
}
export function isLocalModelLoaded(modelId = DEFAULT_LOCAL_MODEL) {
return !!loadedPipelines[modelId];
}
/**
* 获取本地模型信息
*/
export function getLocalModelInfo(modelId = DEFAULT_LOCAL_MODEL) {
return LOCAL_MODELS[modelId] || null;
}
// ═══════════════════════════════════════════════════════════════════════════
// 在线服务操作
// ═══════════════════════════════════════════════════════════════════════════
/**
* 测试在线服务连接
*/
export async function testOnlineService(provider, config) {
const { url, key, model } = config;
if (!key) {
throw new Error('请填写 API Key');
}
if (!model) {
throw new Error('请选择模型');
}
const providerConfig = ONLINE_PROVIDERS[provider];
const baseUrl = (providerConfig?.baseUrl || url || '').replace(/\/+$/, '');
if (!baseUrl) {
throw new Error('请填写 API URL');
}
try {
if (provider === 'cohere') {
// Cohere 使用不同的 API 格式
const response = await fetch(`${baseUrl}/v1/embed`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model,
texts: ['测试连接'],
input_type: 'search_document',
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`API 返回 ${response.status}: ${error}`);
}
const data = await response.json();
const dims = data.embeddings?.[0]?.length || 0;
if (dims === 0) {
throw new Error('API 返回的向量维度为 0');
}
return { success: true, dims };
} else {
// OpenAI 兼容格式
const response = await fetch(`${baseUrl}/v1/embeddings`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model,
input: ['测试连接'],
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`API 返回 ${response.status}: ${error}`);
}
const data = await response.json();
const dims = data.data?.[0]?.embedding?.length || 0;
if (dims === 0) {
throw new Error('API 返回的向量维度为 0');
}
return { success: true, dims };
}
} catch (e) {
if (e.name === 'TypeError' && e.message.includes('fetch')) {
throw new Error('网络错误,请检查 URL 是否正确');
}
throw e;
}
}
/**
* 拉取在线模型列表(仅 OpenAI 兼容)
*/
export async function fetchOnlineModels(config) {
const { url, key } = config;
if (!url || !key) {
throw new Error('请填写 URL 和 Key');
}
const baseUrl = url.replace(/\/+$/, '').replace(/\/v1$/, '');
const response = await fetch(`${baseUrl}/v1/models`, {
headers: {
'Authorization': `Bearer ${key}`,
'Accept': 'application/json',
},
});
if (!response.ok) {
throw new Error(`获取模型列表失败: ${response.status}`);
}
const data = await response.json();
const models = data.data?.map(m => m.id).filter(Boolean) || [];
// 过滤出 embedding 相关的模型
const embeddingModels = models.filter(m => {
const lower = m.toLowerCase();
return lower.includes('embed') ||
lower.includes('bge') ||
lower.includes('e5') ||
lower.includes('gte');
});
return embeddingModels.length > 0 ? embeddingModels : models.slice(0, 20);
}
/**
* 使用在线服务生成向量
*/
async function embedOnline(texts, provider, config) {
const { url, key, model } = config;
const providerConfig = ONLINE_PROVIDERS[provider];
const baseUrl = (providerConfig?.baseUrl || url || '').replace(/\/+$/, '');
const reqId = Math.random().toString(36).slice(2, 6);
const maxRetries = 3;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
const startTime = Date.now();
console.log(`[embed ${reqId}] send ${texts.length} items${attempt > 1 ? ` (retry ${attempt}/${maxRetries})` : ''}`);
try {
let response;
if (provider === 'cohere') {
response = await fetch(`${baseUrl}/v1/embed`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model,
texts: texts,
input_type: 'search_document',
}),
});
} else {
response = await fetch(`${baseUrl}/v1/embeddings`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: model,
input: texts,
}),
});
}
console.log(`[embed ${reqId}] status=${response.status} time=${Date.now() - startTime}ms`);
if (!response.ok) {
const error = await response.text();
throw new Error(`API 返回 ${response.status}: ${error}`);
}
const data = await response.json();
if (provider === 'cohere') {
console.log(`[embed ${reqId}] done items=${data.embeddings?.length || 0} total=${Date.now() - startTime}ms`);
return data.embeddings.map(e => Array.isArray(e) ? e : Array.from(e));
}
console.log(`[embed ${reqId}] done items=${data.data?.length || 0} total=${Date.now() - startTime}ms`);
return data.data.map(item => {
const embedding = item.embedding;
return Array.isArray(embedding) ? embedding : Array.from(embedding);
});
} catch (e) {
console.warn(`[embed ${reqId}] failed attempt=${attempt} time=${Date.now() - startTime}ms`, e.message);
if (attempt < maxRetries) {
const waitTime = Math.pow(2, attempt - 1) * 1000;
console.log(`[embed ${reqId}] wait ${waitTime}ms then retry`);
await new Promise(r => setTimeout(r, waitTime));
continue;
}
console.error(`[embed ${reqId}] final failure`, e);
throw e;
}
}
}
// ═══════════════════════════════════════════════════════════════════════════
// 统一接口
// ═══════════════════════════════════════════════════════════════════════════
/**
* 生成向量(统一接口)
* @param {string[]} texts - 要向量化的文本数组
* @param {Object} config - 配置
* @returns {Promise<number[][]>}
*/
export async function embed(texts, config) {
if (!texts?.length) return [];
const { engine, local, online } = config;
if (engine === 'local') {
const modelId = local?.modelId || DEFAULT_LOCAL_MODEL;
return await embedLocal(texts, modelId);
} else if (engine === 'online') {
const provider = online?.provider || 'siliconflow';
if (!online?.key || !online?.model) {
throw new Error('在线服务配置不完整');
}
return await embedOnline(texts, provider, online);
} else {
throw new Error(`未知的引擎类型: ${engine}`);
}
}
/**
* 获取当前引擎的唯一标识(用于检查向量是否匹配)
*/
// Concurrent embed for online services (local falls back to sequential)
export async function embedBatchesConcurrent(textBatches, config, concurrency = 3) {
if (config.engine === 'local' || textBatches.length <= 1) {
const results = [];
for (const batch of textBatches) {
results.push(await embed(batch, config));
}
return results;
}
const results = new Array(textBatches.length);
let index = 0;
async function worker() {
while (index < textBatches.length) {
const i = index++;
results[i] = await embed(textBatches[i], config);
}
}
await Promise.all(
Array(Math.min(concurrency, textBatches.length))
.fill(null)
.map(() => worker())
);
return results;
}
export function getEngineFingerprint(config) {
if (config.engine === 'local') {
const modelId = config.local?.modelId || DEFAULT_LOCAL_MODEL;
const modelConfig = LOCAL_MODELS[modelId];
return `local:${modelId}:${modelConfig?.dims || 512}`;
} else if (config.engine === 'online') {
const provider = config.online?.provider || 'unknown';
const model = config.online?.model || 'unknown';
return `online:${provider}:${model}`;
} else {
return 'unknown';
}
}

View File

@@ -0,0 +1,64 @@
// run local embedding in background
let pipe = null;
let currentModelId = null;
self.onmessage = async (e) => {
const { type, modelId, hfId, texts, requestId } = e.data || {};
if (type === 'load') {
try {
self.postMessage({ type: 'status', status: 'loading', requestId });
const { pipeline, env } = await import(
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'
);
env.allowLocalModels = false;
env.useBrowserCache = false;
pipe = await pipeline('feature-extraction', hfId, {
progress_callback: (progress) => {
if (progress.status === 'progress' && typeof progress.progress === 'number') {
self.postMessage({ type: 'progress', percent: Math.round(progress.progress), requestId });
}
}
});
currentModelId = modelId;
self.postMessage({ type: 'loaded', requestId });
} catch (err) {
self.postMessage({ type: 'error', error: err?.message || String(err), requestId });
}
return;
}
if (type === 'embed') {
if (!pipe) {
self.postMessage({ type: 'error', error: '模型未加载', requestId });
return;
}
try {
const results = [];
for (let i = 0; i < texts.length; i++) {
const output = await pipe(texts[i], { pooling: 'mean', normalize: true });
results.push(Array.from(output.data));
self.postMessage({ type: 'embed_progress', current: i + 1, total: texts.length, requestId });
}
self.postMessage({ type: 'result', vectors: results, requestId });
} catch (err) {
self.postMessage({ type: 'error', error: err?.message || String(err), requestId });
}
return;
}
if (type === 'check') {
self.postMessage({
type: 'status',
loaded: !!pipe,
modelId: currentModelId,
requestId
});
}
};

View File

@@ -0,0 +1,129 @@
// ═══════════════════════════════════════════════════════════════════════════
// Entity Recognition & Relation Graph
// 实体识别与关系扩散
// ═══════════════════════════════════════════════════════════════════════════
/**
* 从文本中匹配已知实体
* @param {string} text - 待匹配文本
* @param {Set<string>} knownEntities - 已知实体集合
* @returns {string[]} - 匹配到的实体
*/
export function matchEntities(text, knownEntities) {
if (!text || !knownEntities?.size) return [];
const matched = new Set();
for (const entity of knownEntities) {
// 精确包含
if (text.includes(entity)) {
matched.add(entity);
continue;
}
// 处理简称:如果实体是"林黛玉",文本包含"黛玉"
if (entity.length >= 3) {
const shortName = entity.slice(-2); // 取后两字
if (text.includes(shortName)) {
matched.add(entity);
}
}
}
return Array.from(matched);
}
/**
* 从角色数据和事件中收集所有已知实体
*/
export function collectKnownEntities(characters, events) {
const entities = new Set();
// 从主要角色
(characters?.main || []).forEach(m => {
const name = typeof m === 'string' ? m : m.name;
if (name) entities.add(name);
});
// 从关系
(characters?.relationships || []).forEach(r => {
if (r.from) entities.add(r.from);
if (r.to) entities.add(r.to);
});
// 从事件参与者
(events || []).forEach(e => {
(e.participants || []).forEach(p => {
if (p) entities.add(p);
});
});
return entities;
}
/**
* 构建关系邻接表
* @param {Array} relationships - 关系数组
* @returns {Map<string, Array<{target: string, weight: number}>>}
*/
export function buildRelationGraph(relationships) {
const graph = new Map();
const trendWeight = {
'交融': 1.0,
'亲密': 0.9,
'投缘': 0.7,
'陌生': 0.3,
'反感': 0.5,
'厌恶': 0.6,
'破裂': 0.7,
};
for (const rel of relationships || []) {
if (!rel.from || !rel.to) continue;
const weight = trendWeight[rel.trend] || 0.5;
// 双向
if (!graph.has(rel.from)) graph.set(rel.from, []);
if (!graph.has(rel.to)) graph.set(rel.to, []);
graph.get(rel.from).push({ target: rel.to, weight });
graph.get(rel.to).push({ target: rel.from, weight });
}
return graph;
}
/**
* 关系扩散1跳
* @param {string[]} focusEntities - 焦点实体
* @param {Map} graph - 关系图
* @param {number} decayFactor - 衰减因子
* @returns {Map<string, number>} - 实体 -> 激活分数
*/
export function spreadActivation(focusEntities, graph, decayFactor = 0.5) {
const activation = new Map();
// 焦点实体初始分数 1.0
for (const entity of focusEntities) {
activation.set(entity, 1.0);
}
// 1跳扩散
for (const entity of focusEntities) {
const neighbors = graph.get(entity) || [];
for (const { target, weight } of neighbors) {
const spreadScore = weight * decayFactor;
const existing = activation.get(target) || 0;
// 取最大值,不累加
if (spreadScore > existing) {
activation.set(target, spreadScore);
}
}
}
return activation;
}

View File

@@ -0,0 +1,519 @@
// Story Summary - Recall Engine
// L1 chunk + L2 event 召回
// - 全量向量打分
// - 指数衰减加权 Query Embedding
// - 实体/参与者加分
// - MMR 去重
// - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js';
const MODULE_ID = 'recall';
const CONFIG = {
QUERY_MSG_COUNT: 5,
QUERY_DECAY_BETA: 0.7,
QUERY_MAX_CHARS: 600,
QUERY_CONTEXT_CHARS: 240,
CANDIDATE_CHUNKS: 120,
CANDIDATE_EVENTS: 100,
TOP_K_CHUNKS: 40,
TOP_K_EVENTS: 35,
MIN_SIMILARITY: 0.35,
MMR_LAMBDA: 0.72,
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
FLOOR_LIMIT: 1,
};
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
function cosineSimilarity(a, b) {
if (!a?.length || !b?.length || a.length !== b.length) return 0;
let dot = 0, nA = 0, nB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
nA += a[i] * a[i];
nB += b[i] * b[i];
}
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
}
function normalizeVec(v) {
let s = 0;
for (let i = 0; i < v.length; i++) s += v[i] * v[i];
s = Math.sqrt(s) || 1;
return v.map(x => x / s);
}
function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
function stripNoise(text) {
return String(text || '')
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
.replace(/\[tts:[^\]]*\]/gi, '')
.trim();
}
function buildExpDecayWeights(n, beta) {
const last = n - 1;
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
const sum = w.reduce((a, b) => a + b, 0) || 1;
return w.map(x => x / sum);
}
// ═══════════════════════════════════════════════════════════════════════════
// Query 构建
// ═══════════════════════════════════════════════════════════════════════════
function buildQuerySegments(chat, count, excludeLastAi) {
if (!chat?.length) return [];
let messages = chat;
if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
messages = messages.slice(0, -1);
}
return messages.slice(-count).map((m, idx, arr) => {
const speaker = m.name || (m.is_user ? '用户' : '角色');
const clean = stripNoise(m.mes);
if (!clean) return '';
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
return `${speaker}: ${clean.slice(0, limit)}`;
}).filter(Boolean);
}
async function embedWeightedQuery(segments, vectorConfig) {
if (!segments?.length) return null;
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
const vecs = await embed(segments, vectorConfig);
const dims = vecs?.[0]?.length || 0;
if (!dims) return null;
const out = new Array(dims).fill(0);
for (let i = 0; i < vecs.length; i++) {
for (let j = 0; j < dims; j++) out[j] += (vecs[i][j] || 0) * weights[i];
}
return { vector: normalizeVec(out), weights };
}
// ═══════════════════════════════════════════════════════════════════════════
// 实体抽取
// ═══════════════════════════════════════════════════════════════════════════
function buildEntityLexicon(store, allEvents) {
const { name1 } = getContext();
const userName = normalize(name1);
const set = new Set();
for (const e of allEvents || []) {
for (const p of e.participants || []) {
const s = normalize(p);
if (s) set.add(s);
}
}
const json = store?.json || {};
for (const m of json.characters?.main || []) {
const s = normalize(typeof m === 'string' ? m : m?.name);
if (s) set.add(s);
}
for (const a of json.arcs || []) {
const s = normalize(a?.name);
if (s) set.add(s);
}
for (const w of json.world || []) {
const t = normalize(w?.topic);
if (t && !t.includes('::')) set.add(t);
}
for (const r of json.characters?.relationships || []) {
const from = normalize(r?.from);
const to = normalize(r?.to);
if (from) set.add(from);
if (to) set.add(to);
}
const stop = new Set([userName, '我', '你', '他', '她', '它', '用户', '角色', 'assistant'].map(normalize).filter(Boolean));
return Array.from(set)
.filter(s => s.length >= 2 && !stop.has(s) && !/^[\s\p{P}\p{S}]+$/u.test(s) && !/<[^>]+>/.test(s))
.slice(0, 5000);
}
function extractEntities(text, lexicon) {
const t = normalize(text);
if (!t || !lexicon?.length) return [];
const sorted = [...lexicon].sort((a, b) => b.length - a.length);
const hits = [];
for (const e of sorted) {
if (t.includes(e)) hits.push(e);
if (hits.length >= 20) break;
}
return hits;
}
// ═══════════════════════════════════════════════════════════════════════════
// MMR
// ═══════════════════════════════════════════════════════════════════════════
function mmrSelect(candidates, k, lambda, getVector, getScore) {
const selected = [];
const ids = new Set();
while (selected.length < k && candidates.length) {
let best = null, bestScore = -Infinity;
for (const c of candidates) {
if (ids.has(c._id)) continue;
const rel = getScore(c);
let div = 0;
if (selected.length) {
const vC = getVector(c);
if (vC?.length) {
for (const s of selected) {
const sim = cosineSimilarity(vC, getVector(s));
if (sim > div) div = sim;
}
}
}
const score = lambda * rel - (1 - lambda) * div;
if (score > bestScore) {
bestScore = score;
best = c;
}
}
if (!best) break;
selected.push(best);
ids.add(best._id);
}
return selected;
}
// ═══════════════════════════════════════════════════════════════════════════
// L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const chunkVectors = await getAllChunkVectors(chatId);
if (!chunkVectors.length) return [];
const scored = chunkVectors.map(cv => {
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
return {
_id: cv.chunkId,
chunkId: cv.chunkId,
floor: match ? parseInt(match[1], 10) : 0,
chunkIdx: match ? parseInt(match[2], 10) : 0,
similarity: cosineSimilarity(queryVector, cv.vector),
vector: cv.vector,
};
});
const candidates = scored
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, CONFIG.CANDIDATE_CHUNKS);
const selected = mmrSelect(
candidates,
CONFIG.TOP_K_CHUNKS,
CONFIG.MMR_LAMBDA,
c => c.vector,
c => c.similarity
);
// floor 稀疏去重
const floorCount = new Map();
const sparse = [];
for (const s of selected.sort((a, b) => b.similarity - a.similarity)) {
const cnt = floorCount.get(s.floor) || 0;
if (cnt >= CONFIG.FLOOR_LIMIT) continue;
floorCount.set(s.floor, cnt + 1);
sparse.push(s);
}
const floors = [...new Set(sparse.map(c => c.floor))];
const chunks = await getChunksByFloors(chatId, floors);
const chunkMap = new Map(chunks.map(c => [c.chunkId, c]));
return sparse.map(item => {
const chunk = chunkMap.get(item.chunkId);
if (!chunk) return null;
return {
chunkId: item.chunkId,
floor: item.floor,
chunkIdx: item.chunkIdx,
speaker: chunk.speaker,
isUser: chunk.isUser,
text: chunk.text,
similarity: item.similarity,
};
}).filter(Boolean);
}
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
const userName = normalize(name1);
const querySet = new Set((queryEntities || []).map(normalize));
// 只取硬约束类的 world topic
const worldTopics = (store?.json?.world || [])
.filter(w => ['inventory', 'rule', 'knowledge'].includes(String(w.category).toLowerCase()))
.map(w => normalize(w.topic))
.filter(Boolean);
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const sim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
const reasons = [];
// participants 命中
const participants = (event.participants || []).map(normalize).filter(Boolean);
if (participants.some(p => p !== userName && querySet.has(p))) {
bonus += CONFIG.BONUS_PARTICIPANT_HIT;
reasons.push('participant');
}
// text 命中
const text = normalize(`${event.title || ''} ${event.summary || ''}`);
if ((queryEntities || []).some(e => text.includes(normalize(e)))) {
bonus += CONFIG.BONUS_TEXT_HIT;
reasons.push('text');
}
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
}
return {
_id: event.id,
_idx: idx,
event,
similarity: sim,
bonus,
finalScore: sim + bonus,
reasons,
isDirect: reasons.includes('participant'),
vector: v,
};
});
const candidates = scored
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
const selected = mmrSelect(
candidates,
CONFIG.TOP_K_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector,
c => c.finalScore
);
return selected
.sort((a, b) => b.finalScore - a.finalScore)
.map(s => ({
event: s.event,
similarity: s.finalScore,
_recallType: s.isDirect ? 'DIRECT' : 'SIMILAR',
_recallReason: s.reasons.length ? s.reasons.join('+') : '相似',
}));
}
// ═══════════════════════════════════════════════════════════════════════════
// 日志
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities }) {
const lines = [
'╔══════════════════════════════════════════════════════════════╗',
'║ 记忆召回报告 ║',
'╠══════════════════════════════════════════════════════════════╣',
`║ 耗时: ${elapsed}ms`,
'╚══════════════════════════════════════════════════════════════╝',
'',
'┌─────────────────────────────────────────────────────────────┐',
'│ 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) │',
'│ 权重越高 = 对召回方向影响越大 │',
'└─────────────────────────────────────────────────────────────┘',
];
// 按权重从高到低排序显示
const segmentsSorted = segments.map((s, i) => ({
idx: i + 1,
weight: weights?.[i] ?? 0,
text: s,
})).sort((a, b) => b.weight - a.weight);
segmentsSorted.forEach((s, rank) => {
const bar = '█'.repeat(Math.round(s.weight * 20));
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
const marker = rank === 0 ? ' ◀ 主导' : '';
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
});
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push(`│ 【L1 原文片段】召回 ${chunkResults.length}`);
lines.push('└─────────────────────────────────────────────────────────────┘');
chunkResults.slice(0, 15).forEach((c, i) => {
const preview = c.text.length > 50 ? c.text.slice(0, 50) + '...' : c.text;
lines.push(` ${String(i + 1).padStart(2)}. #${String(c.floor).padStart(3)} [${c.speaker}] ${preview}`);
lines.push(` 相似度: ${c.similarity.toFixed(3)}`);
});
if (chunkResults.length > 15) {
lines.push(` ... 还有 ${chunkResults.length - 15}`);
}
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push(`│ 【L2 事件记忆】召回 ${eventResults.length} / ${allEvents.length}`);
lines.push('│ DIRECT=亲身经历 SIMILAR=相关背景 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
eventResults.forEach((e, i) => {
const type = e._recallType === 'DIRECT' ? '★ DIRECT ' : ' SIMILAR';
const title = e.event.title || '(无标题)';
lines.push(` ${String(i + 1).padStart(2)}. ${type} ${title}`);
lines.push(` 相似度: ${e.similarity.toFixed(3)} | 原因: ${e._recallReason}`);
});
// 统计
const directCount = eventResults.filter(e => e._recallType === 'DIRECT').length;
const similarCount = eventResults.filter(e => e._recallType === 'SIMILAR').length;
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【统计】 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` L1 片段: ${chunkResults.length}`);
lines.push(` L2 事件: ${eventResults.length} 条 (DIRECT: ${directCount}, SIMILAR: ${similarCount})`);
lines.push(` 实体命中: ${queryEntities?.length || 0}`);
lines.push('');
return lines.join('\n');
}
// ═══════════════════════════════════════════════════════════════════════════
// 主入口
// ═══════════════════════════════════════════════════════════════════════════
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
const T0 = performance.now();
const { chat } = getContext();
const store = getSummaryStore();
if (!allEvents?.length) {
return { events: [], chunks: [], elapsed: 0, logText: 'No events.' };
}
const segments = buildQuerySegments(chat, CONFIG.QUERY_MSG_COUNT, !!options.excludeLastAi);
let queryVector, weights;
try {
const result = await embedWeightedQuery(segments, vectorConfig);
queryVector = result?.vector;
weights = result?.weights;
} catch (e) {
xbLog.error(MODULE_ID, '查询向量生成失败', e);
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Query embedding failed.' };
}
if (!queryVector?.length) {
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
}
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig),
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities),
]);
const elapsed = Math.round(performance.now() - T0);
const logText = formatRecallLog({ elapsed, queryText, segments, weights, chunkResults, eventResults, allEvents, queryEntities });
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length}`);
console.groupEnd();
return { events: eventResults, chunks: chunkResults, elapsed, logText };
}
export function buildQueryText(chat, count = 2, excludeLastAi = false) {
if (!chat?.length) return '';
let messages = chat;
if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
messages = messages.slice(0, -1);
}
return messages.slice(-count).map(m => {
const text = stripNoise(m.mes);
const speaker = m.name || (m.is_user ? '用户' : '角色');
return `${speaker}: ${text.slice(0, 500)}`;
}).filter(Boolean).join('\n');
}

View File

@@ -0,0 +1,93 @@
// ═══════════════════════════════════════════════════════════════════════════
// MiniSearch Index Manager
// 全文搜索索引管理
// ═══════════════════════════════════════════════════════════════════════════
import MiniSearch from '../../../libs/minisearch.mjs';
// 索引缓存chatId -> { index, updatedAt }
const indexCache = new Map();
/**
* 获取或创建搜索索引
* @param {string} chatId
* @param {Array} events - L2 事件
* @param {number} storeUpdatedAt - store.updatedAt 时间戳
* @returns {MiniSearch}
*/
export function getSearchIndex(chatId, events, storeUpdatedAt) {
const cached = indexCache.get(chatId);
// 缓存有效
if (cached && cached.updatedAt >= storeUpdatedAt) {
return cached.index;
}
// 重建索引
const index = new MiniSearch({
fields: ['title', 'summary', 'participants'],
storeFields: ['id'],
searchOptions: {
boost: { title: 2, participants: 1.5, summary: 1 },
fuzzy: 0.2,
prefix: true,
},
});
// 索引事件
const docs = events.map(e => ({
id: e.id,
title: e.title || '',
summary: e.summary || '',
participants: (e.participants || []).join(' '),
}));
index.addAll(docs);
// 缓存
indexCache.set(chatId, { index, updatedAt: storeUpdatedAt });
// 限制缓存数量
if (indexCache.size > 5) {
const firstKey = indexCache.keys().next().value;
indexCache.delete(firstKey);
}
return index;
}
/**
* 关键词搜索
* @returns {Map<string, number>} - eventId -> 归一化分数
*/
export function searchByKeywords(index, queryText, limit = 20) {
if (!queryText?.trim()) return new Map();
const results = index.search(queryText, { limit });
if (results.length === 0) return new Map();
// 归一化分数到 0-1
const maxScore = results[0].score;
const scores = new Map();
for (const r of results) {
scores.set(r.id, r.score / maxScore);
}
return scores;
}
/**
* 清除指定聊天的索引缓存
*/
export function invalidateIndex(chatId) {
indexCache.delete(chatId);
}
/**
* 清除所有索引缓存
*/
export function clearAllIndexes() {
indexCache.clear();
}