Update story summary recall and prompt
This commit is contained in:
370
modules/story-summary/vector/pipeline/chunk-builder.js
Normal file
370
modules/story-summary/vector/pipeline/chunk-builder.js
Normal file
@@ -0,0 +1,370 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - Chunk Builder
|
||||
// 标准 RAG chunking: ~200 tokens per chunk
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import {
|
||||
getMeta,
|
||||
updateMeta,
|
||||
saveChunks,
|
||||
saveChunkVectors,
|
||||
clearAllChunks,
|
||||
deleteChunksFromFloor,
|
||||
deleteChunksAtFloor,
|
||||
makeChunkId,
|
||||
hashText,
|
||||
CHUNK_MAX_TOKENS,
|
||||
} from '../storage/chunk-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
|
||||
const MODULE_ID = 'chunk-builder';
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Token 估算
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function estimateTokens(text) {
|
||||
if (!text) return 0;
|
||||
const chinese = (text.match(/[\u4e00-\u9fff]/g) || []).length;
|
||||
const other = text.length - chinese;
|
||||
return Math.ceil(chinese + other / 4);
|
||||
}
|
||||
|
||||
function splitSentences(text) {
|
||||
if (!text) return [];
|
||||
const parts = text.split(/(?<=[。!?\n])|(?<=[.!?]\s)/);
|
||||
return parts.map(s => s.trim()).filter(s => s.length > 0);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Chunk 切分
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
|
||||
const text = message.mes || '';
|
||||
const speaker = message.name || (message.is_user ? '用户' : '角色');
|
||||
const isUser = !!message.is_user;
|
||||
|
||||
// 1. 应用用户自定义过滤规则
|
||||
// 2. 移除 TTS 标记(硬编码)
|
||||
// 3. 移除 <state> 标签(硬编码,L0 已单独存储)
|
||||
const cleanText = filterText(text)
|
||||
.replace(/\[tts:[^\]]*\]/gi, '')
|
||||
.replace(/<state>[\s\S]*?<\/state>/gi, '')
|
||||
.trim();
|
||||
|
||||
if (!cleanText) return [];
|
||||
|
||||
const totalTokens = estimateTokens(cleanText);
|
||||
|
||||
if (totalTokens <= maxTokens) {
|
||||
return [{
|
||||
chunkId: makeChunkId(floor, 0),
|
||||
floor,
|
||||
chunkIdx: 0,
|
||||
speaker,
|
||||
isUser,
|
||||
text: cleanText,
|
||||
textHash: hashText(cleanText),
|
||||
}];
|
||||
}
|
||||
|
||||
const sentences = splitSentences(cleanText);
|
||||
const chunks = [];
|
||||
let currentSentences = [];
|
||||
let currentTokens = 0;
|
||||
|
||||
for (const sent of sentences) {
|
||||
const sentTokens = estimateTokens(sent);
|
||||
|
||||
if (sentTokens > maxTokens) {
|
||||
if (currentSentences.length > 0) {
|
||||
const chunkText = currentSentences.join('');
|
||||
chunks.push({
|
||||
chunkId: makeChunkId(floor, chunks.length),
|
||||
floor,
|
||||
chunkIdx: chunks.length,
|
||||
speaker,
|
||||
isUser,
|
||||
text: chunkText,
|
||||
textHash: hashText(chunkText),
|
||||
});
|
||||
currentSentences = [];
|
||||
currentTokens = 0;
|
||||
}
|
||||
|
||||
const sliceSize = maxTokens * 2;
|
||||
for (let i = 0; i < sent.length; i += sliceSize) {
|
||||
const slice = sent.slice(i, i + sliceSize);
|
||||
chunks.push({
|
||||
chunkId: makeChunkId(floor, chunks.length),
|
||||
floor,
|
||||
chunkIdx: chunks.length,
|
||||
speaker,
|
||||
isUser,
|
||||
text: slice,
|
||||
textHash: hashText(slice),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentTokens + sentTokens > maxTokens && currentSentences.length > 0) {
|
||||
const chunkText = currentSentences.join('');
|
||||
chunks.push({
|
||||
chunkId: makeChunkId(floor, chunks.length),
|
||||
floor,
|
||||
chunkIdx: chunks.length,
|
||||
speaker,
|
||||
isUser,
|
||||
text: chunkText,
|
||||
textHash: hashText(chunkText),
|
||||
});
|
||||
currentSentences = [];
|
||||
currentTokens = 0;
|
||||
}
|
||||
|
||||
currentSentences.push(sent);
|
||||
currentTokens += sentTokens;
|
||||
}
|
||||
|
||||
if (currentSentences.length > 0) {
|
||||
const chunkText = currentSentences.join('');
|
||||
chunks.push({
|
||||
chunkId: makeChunkId(floor, chunks.length),
|
||||
floor,
|
||||
chunkIdx: chunks.length,
|
||||
speaker,
|
||||
isUser,
|
||||
text: chunkText,
|
||||
textHash: hashText(chunkText),
|
||||
});
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 构建状态
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function getChunkBuildStatus() {
|
||||
const { chat, chatId } = getContext();
|
||||
if (!chatId) {
|
||||
return { totalFloors: 0, builtFloors: 0, pending: 0 };
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const totalFloors = chat?.length || 0;
|
||||
const builtFloors = meta.lastChunkFloor + 1;
|
||||
|
||||
return {
|
||||
totalFloors,
|
||||
builtFloors,
|
||||
lastChunkFloor: meta.lastChunkFloor,
|
||||
pending: Math.max(0, totalFloors - builtFloors),
|
||||
};
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 全量构建
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function buildAllChunks(options = {}) {
|
||||
const { onProgress, shouldCancel, vectorConfig } = options;
|
||||
|
||||
const { chat, chatId } = getContext();
|
||||
if (!chatId || !chat?.length) {
|
||||
return { built: 0, errors: 0 };
|
||||
}
|
||||
|
||||
const fingerprint = getEngineFingerprint(vectorConfig);
|
||||
|
||||
await clearAllChunks(chatId);
|
||||
await updateMeta(chatId, { lastChunkFloor: -1, fingerprint });
|
||||
|
||||
const allChunks = [];
|
||||
for (let floor = 0; floor < chat.length; floor++) {
|
||||
const chunks = chunkMessage(floor, chat[floor]);
|
||||
allChunks.push(...chunks);
|
||||
}
|
||||
|
||||
if (allChunks.length === 0) {
|
||||
return { built: 0, errors: 0 };
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `开始构建 ${allChunks.length} 个 chunks(${chat.length} 层楼)`);
|
||||
|
||||
await saveChunks(chatId, allChunks);
|
||||
|
||||
const texts = allChunks.map(c => c.text);
|
||||
const isLocal = vectorConfig.engine === 'local';
|
||||
const batchSize = isLocal ? 5 : 20;
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
const allVectors = [];
|
||||
|
||||
for (let i = 0; i < texts.length; i += batchSize) {
|
||||
if (shouldCancel?.()) break;
|
||||
|
||||
const batch = texts.slice(i, i + batchSize);
|
||||
|
||||
try {
|
||||
const vectors = await embed(batch, vectorConfig);
|
||||
allVectors.push(...vectors);
|
||||
completed += batch.length;
|
||||
onProgress?.(completed, texts.length);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `批次 ${i}/${texts.length} 向量化失败`, e);
|
||||
allVectors.push(...batch.map(() => null));
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldCancel?.()) {
|
||||
return { built: completed, errors };
|
||||
}
|
||||
|
||||
const vectorItems = allChunks
|
||||
.map((chunk, idx) => allVectors[idx] ? { chunkId: chunk.chunkId, vector: allVectors[idx] } : null)
|
||||
.filter(Boolean);
|
||||
|
||||
if (vectorItems.length > 0) {
|
||||
await saveChunkVectors(chatId, vectorItems, fingerprint);
|
||||
}
|
||||
|
||||
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
|
||||
|
||||
xbLog.info(MODULE_ID, `构建完成:${vectorItems.length} 个向量,${errors} 个错误`);
|
||||
|
||||
return { built: vectorItems.length, errors };
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 增量构建
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function buildIncrementalChunks(options = {}) {
|
||||
const { vectorConfig } = options;
|
||||
|
||||
const { chat, chatId } = getContext();
|
||||
if (!chatId || !chat?.length) {
|
||||
return { built: 0 };
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fingerprint = getEngineFingerprint(vectorConfig);
|
||||
|
||||
if (meta.fingerprint && meta.fingerprint !== fingerprint) {
|
||||
xbLog.warn(MODULE_ID, '引擎指纹不匹配,跳过增量构建');
|
||||
return { built: 0 };
|
||||
}
|
||||
|
||||
const startFloor = meta.lastChunkFloor + 1;
|
||||
if (startFloor >= chat.length) {
|
||||
return { built: 0 };
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `增量构建 ${startFloor} - ${chat.length - 1} 层`);
|
||||
|
||||
const newChunks = [];
|
||||
for (let floor = startFloor; floor < chat.length; floor++) {
|
||||
const chunks = chunkMessage(floor, chat[floor]);
|
||||
newChunks.push(...chunks);
|
||||
}
|
||||
|
||||
if (newChunks.length === 0) {
|
||||
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
|
||||
return { built: 0 };
|
||||
}
|
||||
|
||||
await saveChunks(chatId, newChunks);
|
||||
|
||||
const texts = newChunks.map(c => c.text);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorConfig);
|
||||
const vectorItems = newChunks.map((chunk, idx) => ({
|
||||
chunkId: chunk.chunkId,
|
||||
vector: vectors[idx],
|
||||
}));
|
||||
await saveChunkVectors(chatId, vectorItems, fingerprint);
|
||||
await updateMeta(chatId, { lastChunkFloor: chat.length - 1 });
|
||||
|
||||
return { built: vectorItems.length };
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, '增量向量化失败', e);
|
||||
return { built: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L1 同步(消息变化时调用)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 消息删除后同步:删除 floor >= newLength 的 chunk
|
||||
*/
|
||||
export async function syncOnMessageDeleted(chatId, newLength) {
|
||||
if (!chatId || newLength < 0) return;
|
||||
|
||||
await deleteChunksFromFloor(chatId, newLength);
|
||||
await updateMeta(chatId, { lastChunkFloor: newLength - 1 });
|
||||
|
||||
xbLog.info(MODULE_ID, `消息删除同步:删除 floor >= ${newLength}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* swipe 后同步:删除最后楼层的 chunk(等待后续重建)
|
||||
*/
|
||||
export async function syncOnMessageSwiped(chatId, lastFloor) {
|
||||
if (!chatId || lastFloor < 0) return;
|
||||
|
||||
await deleteChunksAtFloor(chatId, lastFloor);
|
||||
await updateMeta(chatId, { lastChunkFloor: lastFloor - 1 });
|
||||
|
||||
xbLog.info(MODULE_ID, `swipe 同步:删除 floor ${lastFloor}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* 新消息后同步:删除 + 重建最后楼层
|
||||
*/
|
||||
export async function syncOnMessageReceived(chatId, lastFloor, message, vectorConfig) {
|
||||
if (!chatId || lastFloor < 0 || !message) return;
|
||||
if (!vectorConfig?.enabled) return;
|
||||
|
||||
// 本地模型未加载时跳过(避免意外触发下载或报错)
|
||||
if (vectorConfig.engine === "local") {
|
||||
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
|
||||
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
|
||||
if (!isLocalModelLoaded(modelId)) return;
|
||||
}
|
||||
|
||||
// 删除该楼层旧的
|
||||
await deleteChunksAtFloor(chatId, lastFloor);
|
||||
|
||||
// 重建
|
||||
const chunks = chunkMessage(lastFloor, message);
|
||||
if (chunks.length === 0) return;
|
||||
|
||||
await saveChunks(chatId, chunks);
|
||||
|
||||
// 向量化
|
||||
const fingerprint = getEngineFingerprint(vectorConfig);
|
||||
const texts = chunks.map(c => c.text);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorConfig);
|
||||
const items = chunks.map((c, i) => ({ chunkId: c.chunkId, vector: vectors[i] }));
|
||||
await saveChunkVectors(chatId, items, fingerprint);
|
||||
await updateMeta(chatId, { lastChunkFloor: lastFloor });
|
||||
|
||||
xbLog.info(MODULE_ID, `消息同步:重建 floor ${lastFloor},${chunks.length} 个 chunk`);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `消息同步失败:floor ${lastFloor}`, e);
|
||||
}
|
||||
}
|
||||
153
modules/story-summary/vector/pipeline/state-integration.js
Normal file
153
modules/story-summary/vector/pipeline/state-integration.js
Normal file
@@ -0,0 +1,153 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Integration (L0)
|
||||
// 事件监听 + 回滚钩子注册
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import {
|
||||
saveStateAtoms,
|
||||
saveStateVectors,
|
||||
deleteStateAtomsFromFloor,
|
||||
deleteStateVectorsFromFloor,
|
||||
getStateAtoms,
|
||||
clearStateVectors,
|
||||
} from '../storage/state-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'state-integration';
|
||||
|
||||
let initialized = false;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 初始化
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export function initStateIntegration() {
|
||||
if (initialized) return;
|
||||
initialized = true;
|
||||
|
||||
// 监听变量团队的事件
|
||||
$(document).on('xiaobaix:variables:stateAtomsGenerated', handleStateAtomsGenerated);
|
||||
|
||||
// 注册回滚钩子
|
||||
globalThis.LWB_StateRollbackHook = handleStateRollback;
|
||||
|
||||
xbLog.info(MODULE_ID, 'L0 状态层集成已初始化');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 事件处理
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function handleStateAtomsGenerated(e, data) {
|
||||
const { atoms } = data || {};
|
||||
if (!atoms?.length) return;
|
||||
|
||||
const { chatId } = getContext();
|
||||
if (!chatId) return;
|
||||
|
||||
const validAtoms = atoms.filter(a => a?.chatId === chatId);
|
||||
if (!validAtoms.length) {
|
||||
xbLog.warn(MODULE_ID, `atoms.chatId 不匹配,期望 ${chatId},跳过`);
|
||||
return;
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `收到 ${validAtoms.length} 个 StateAtom`);
|
||||
|
||||
// 1. 存入 chat_metadata(持久化)
|
||||
saveStateAtoms(validAtoms);
|
||||
|
||||
// 2. 向量化并存入 IndexedDB
|
||||
const vectorCfg = getVectorConfig();
|
||||
if (!vectorCfg?.enabled) {
|
||||
xbLog.info(MODULE_ID, '向量未启用,跳过 L0 向量化');
|
||||
return;
|
||||
}
|
||||
|
||||
await vectorizeAtoms(chatId, validAtoms, vectorCfg);
|
||||
}
|
||||
|
||||
async function vectorizeAtoms(chatId, atoms, vectorCfg) {
|
||||
const texts = atoms.map(a => a.semantic);
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
|
||||
const items = atoms.map((a, i) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[i],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
xbLog.info(MODULE_ID, `L0 向量化完成: ${items.length} 个`);
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, 'L0 向量化失败', e);
|
||||
// 不阻塞,向量可后续通过"生成向量"重建
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 回滚钩子
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function handleStateRollback(floor) {
|
||||
xbLog.info(MODULE_ID, `收到回滚请求: floor >= ${floor}`);
|
||||
|
||||
const { chatId } = getContext();
|
||||
|
||||
// 1. 删除 chat_metadata 中的 atoms
|
||||
deleteStateAtomsFromFloor(floor);
|
||||
|
||||
// 2. 删除 IndexedDB 中的 vectors
|
||||
if (chatId) {
|
||||
await deleteStateVectorsFromFloor(chatId, floor);
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 重建向量(供"生成向量"按钮调用)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function rebuildStateVectors(chatId, vectorCfg) {
|
||||
if (!chatId || !vectorCfg?.enabled) return { built: 0 };
|
||||
|
||||
const atoms = getStateAtoms();
|
||||
if (!atoms.length) return { built: 0 };
|
||||
|
||||
xbLog.info(MODULE_ID, `开始重建 L0 向量: ${atoms.length} 个 atom`);
|
||||
|
||||
// 清空旧向量
|
||||
await clearStateVectors(chatId);
|
||||
|
||||
// 重新向量化
|
||||
const fingerprint = getEngineFingerprint(vectorCfg);
|
||||
const batchSize = vectorCfg.engine === 'local' ? 5 : 25;
|
||||
let built = 0;
|
||||
|
||||
for (let i = 0; i < atoms.length; i += batchSize) {
|
||||
const batch = atoms.slice(i, i + batchSize);
|
||||
const texts = batch.map(a => a.semantic);
|
||||
|
||||
try {
|
||||
const vectors = await embed(texts, vectorCfg);
|
||||
|
||||
const items = batch.map((a, j) => ({
|
||||
atomId: a.atomId,
|
||||
floor: a.floor,
|
||||
vector: vectors[j],
|
||||
}));
|
||||
|
||||
await saveStateVectors(chatId, items, fingerprint);
|
||||
built += items.length;
|
||||
} catch (e) {
|
||||
xbLog.error(MODULE_ID, `L0 向量化批次失败: ${i}-${i + batchSize}`, e);
|
||||
}
|
||||
}
|
||||
|
||||
xbLog.info(MODULE_ID, `L0 向量重建完成: ${built}/${atoms.length}`);
|
||||
return { built };
|
||||
}
|
||||
160
modules/story-summary/vector/pipeline/state-recall.js
Normal file
160
modules/story-summary/vector/pipeline/state-recall.js
Normal file
@@ -0,0 +1,160 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - State Recall (L0)
|
||||
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
|
||||
import { getMeta } from '../storage/chunk-store.js';
|
||||
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'state-recall';
|
||||
|
||||
const CONFIG = {
|
||||
MAX_RESULTS: 20,
|
||||
MIN_SIMILARITY: 0.55,
|
||||
};
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function cosineSimilarity(a, b) {
|
||||
if (!a?.length || !b?.length || a.length !== b.length) return 0;
|
||||
let dot = 0, nA = 0, nB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
nA += a[i] * a[i];
|
||||
nB += b[i] * b[i];
|
||||
}
|
||||
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L0 向量检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 检索与 query 相似的 StateAtoms
|
||||
* @returns {Array<{atom, similarity}>}
|
||||
*/
|
||||
export async function searchStateAtoms(queryVector, vectorConfig) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
// 检查 fingerprint
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) {
|
||||
xbLog.warn(MODULE_ID, 'fingerprint 不匹配,跳过 L0 召回');
|
||||
return [];
|
||||
}
|
||||
|
||||
// 获取向量
|
||||
const stateVectors = await getAllStateVectors(chatId);
|
||||
if (!stateVectors.length) return [];
|
||||
|
||||
// 获取 atoms(用于关联 semantic 等字段)
|
||||
const atoms = getStateAtoms();
|
||||
const atomMap = new Map(atoms.map(a => [a.atomId, a]));
|
||||
|
||||
// 计算相似度
|
||||
const scored = stateVectors
|
||||
.map(sv => {
|
||||
const atom = atomMap.get(sv.atomId);
|
||||
if (!atom) return null;
|
||||
|
||||
return {
|
||||
atomId: sv.atomId,
|
||||
floor: sv.floor,
|
||||
similarity: cosineSimilarity(queryVector, sv.vector),
|
||||
atom,
|
||||
};
|
||||
})
|
||||
.filter(Boolean)
|
||||
.filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, CONFIG.MAX_RESULTS);
|
||||
|
||||
return scored;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Floor Bonus 构建
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 构建 L0 相关楼层的加权映射
|
||||
* @returns {Map<number, number>}
|
||||
*/
|
||||
export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
|
||||
const floorBonus = new Map();
|
||||
|
||||
for (const r of l0Results || []) {
|
||||
// 每个楼层只加一次,取最高相似度对应的 bonus
|
||||
// 简化处理:统一加 bonusFactor,不区分相似度高低
|
||||
if (!floorBonus.has(r.floor)) {
|
||||
floorBonus.set(r.floor, bonusFactor);
|
||||
}
|
||||
}
|
||||
|
||||
return floorBonus;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 虚拟 Chunk 转换
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 将 L0 结果转换为虚拟 chunk 格式
|
||||
* 用于和 L1 chunks 统一处理
|
||||
*/
|
||||
export function stateToVirtualChunks(l0Results) {
|
||||
return (l0Results || []).map(r => ({
|
||||
chunkId: `state-${r.atomId}`,
|
||||
floor: r.floor,
|
||||
chunkIdx: -1, // 负值,排序时排在 L1 前面
|
||||
speaker: '📌', // 固定标记
|
||||
isUser: false,
|
||||
text: r.atom.semantic,
|
||||
textHash: null,
|
||||
similarity: r.similarity,
|
||||
isL0: true, // 标记字段
|
||||
// 保留原始 atom 信息
|
||||
_atom: r.atom,
|
||||
}));
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 每楼层稀疏去重
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* 合并 L0 和 L1 chunks,每楼层最多保留 limit 条
|
||||
* @param {Array} l0Chunks - 虚拟 chunks(已按相似度排序)
|
||||
* @param {Array} l1Chunks - 真实 chunks(已按相似度排序)
|
||||
* @param {number} limit - 每楼层上限
|
||||
* @returns {Array} 合并后的 chunks
|
||||
*/
|
||||
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
|
||||
// 合并并按相似度排序
|
||||
const all = [...(l0Chunks || []), ...(l1Chunks || [])]
|
||||
.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
// 每楼层稀疏去重
|
||||
const byFloor = new Map();
|
||||
|
||||
for (const c of all) {
|
||||
const arr = byFloor.get(c.floor) || [];
|
||||
if (arr.length < limit) {
|
||||
arr.push(c);
|
||||
byFloor.set(c.floor, arr);
|
||||
}
|
||||
}
|
||||
|
||||
// 扁平化并保持相似度排序
|
||||
return Array.from(byFloor.values())
|
||||
.flat()
|
||||
.sort((a, b) => b.similarity - a.similarity);
|
||||
}
|
||||
Reference in New Issue
Block a user