Add vector IO and text filtering

This commit is contained in:
2026-01-29 17:02:51 +08:00
parent fc23781e17
commit ee5f02fff9
10 changed files with 3368 additions and 42 deletions

2665
libs/fflate.mjs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,11 @@ export function getSettings() {
return ext; return ext;
} }
const DEFAULT_FILTER_RULES = [
{ start: '<think>', end: '</think>' },
{ start: '<thinking>', end: '</thinking>' },
];
export function getSummaryPanelConfig() { export function getSummaryPanelConfig() {
const defaults = { const defaults = {
api: { provider: 'st', url: '', key: '', model: '', modelCache: [] }, api: { provider: 'st', url: '', key: '', model: '', modelCache: [] },
@@ -29,6 +34,7 @@ export function getSummaryPanelConfig() {
wrapperTail: '', wrapperTail: '',
forceInsertAtEnd: false, forceInsertAtEnd: false,
}, },
vector: null,
}; };
try { try {
const raw = localStorage.getItem('summary_panel_config'); const raw = localStorage.getItem('summary_panel_config');
@@ -64,12 +70,21 @@ export function getVectorConfig() {
const raw = localStorage.getItem('summary_panel_config'); const raw = localStorage.getItem('summary_panel_config');
if (!raw) return null; if (!raw) return null;
const parsed = JSON.parse(raw); const parsed = JSON.parse(raw);
return parsed.vector || null; const cfg = parsed.vector || null;
if (cfg && !cfg.textFilterRules) {
cfg.textFilterRules = [...DEFAULT_FILTER_RULES];
}
return cfg;
} catch { } catch {
return null; return null;
} }
} }
export function getTextFilterRules() {
const cfg = getVectorConfig();
return cfg?.textFilterRules || DEFAULT_FILTER_RULES;
}
export function saveVectorConfig(vectorCfg) { export function saveVectorConfig(vectorCfg) {
try { try {
const raw = localStorage.getItem('summary_panel_config') || '{}'; const raw = localStorage.getItem('summary_panel_config') || '{}';

View File

@@ -163,7 +163,6 @@ function formatCausalEventLine(causalItem, causalById) {
return lines.join("\n"); return lines.join("\n");
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 装配日志(开发调试用) // 装配日志(开发调试用)
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@@ -251,6 +250,19 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
return lines.join("\n"); return lines.join("\n");
} }
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
function renumberEventText(text, newIndex) {
const s = String(text || "");
// 匹配行首: "12." 或 "12.【"
return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`);
}
function getEventSortKey(ev) {
const r = parseFloorRange(ev?.summary);
if (r) return r.start; // 按事件出现楼层排序(最靠谱)
const m = String(ev?.id || "").match(/evt-(\d+)/);
return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER;
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 非向量模式:全量总结注入(世界 + 事件 + 弧光) // 非向量模式:全量总结注入(世界 + 事件 + 弧光)
@@ -451,29 +463,29 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
// 候选按相似度从高到低(保证高分优先拥有证据) // 候选按相似度从高到低(保证高分优先拥有证据)
const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0)); const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
let idxDirect = 1; const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence }
let idxSimilar = 1; const selectedSimilar = []; // { event, text, tokens, chunk, hasEvidence }
const selectedDirectTexts = [];
const selectedSimilarTexts = [];
for (const e of candidates) { for (const e of candidates) {
if (total.used >= total.max) break; if (total.used >= total.max) break;
const isDirect = e._recallType === "DIRECT"; const isDirect = e._recallType === "DIRECT";
const idx = isDirect ? idxDirect : idxSimilar;
const bestChunk = pickBestChunkForEvent(e.event); const bestChunk = pickBestChunkForEvent(e.event);
// 先尝试“带证据” // 先尝试“带证据”
let text = formatEventWithEvidence(e, idx, bestChunk); // idx 先占位写 0后面统一按时间线重排后再改号
let text = formatEventWithEvidence(e, 0, bestChunk);
let cost = estimateTokens(text); let cost = estimateTokens(text);
let hasEvidence = !!bestChunk; let hasEvidence = !!bestChunk;
let chosenChunk = bestChunk || null;
// 塞不下就退化成“不带证据” // 塞不下就退化成“不带证据”
if (total.used + cost > total.max) { if (total.used + cost > total.max) {
text = formatEventWithEvidence(e, idx, null); text = formatEventWithEvidence(e, 0, null);
cost = estimateTokens(text); cost = estimateTokens(text);
hasEvidence = false; hasEvidence = false;
chosenChunk = null;
if (total.used + cost > total.max) { if (total.used + cost > total.max) {
continue; continue;
@@ -482,11 +494,9 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
// 写入 // 写入
if (isDirect) { if (isDirect) {
selectedDirectTexts.push(text); selectedDirect.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence });
idxDirect++;
} else { } else {
selectedSimilarTexts.push(text); selectedSimilar.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence });
idxSimilar++;
} }
injectionStats.events.selected++; injectionStats.events.selected++;
@@ -515,8 +525,19 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
}); });
} }
details.directCount = selectedDirectTexts.length; // ═══════════════════════════════════════════════════════════════════
details.similarCount = selectedSimilarTexts.length; // 重排:恢复时间线顺序(按楼层/evt 序号升序)
// 并统一重编号(不重新 pick chunk不重新格式化结构
// ═══════════════════════════════════════════════════════════════════
selectedDirect.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));
selectedSimilar.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));
const selectedDirectTexts = selectedDirect.map((it, i) => renumberEventText(it.text, i + 1));
const selectedSimilarTexts = selectedSimilar.map((it, i) => renumberEventText(it.text, i + 1));
details.directCount = selectedDirect.length;
details.similarCount = selectedSimilar.length;
assembled.events.direct = selectedDirectTexts; assembled.events.direct = selectedDirectTexts;
assembled.events.similar = selectedSimilarTexts; assembled.events.similar = selectedSimilarTexts;

View File

@@ -102,6 +102,11 @@
} }
}; };
const DEFAULT_FILTER_RULES = [
{ start: '<think>', end: '</think>' },
{ start: '<thinking>', end: '</thinking>' },
];
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// State // State
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -199,7 +204,7 @@
if (opt.value) modelCache.push(opt.value); if (opt.value) modelCache.push(opt.value);
} }
} }
return { const result = {
enabled: safeVal('vector-enabled', false), enabled: safeVal('vector-enabled', false),
engine: safeRadio('vector-engine', 'online'), engine: safeRadio('vector-engine', 'online'),
local: { modelId: safeVal('local-model-select', 'bge-small-zh') }, local: { modelId: safeVal('local-model-select', 'bge-small-zh') },
@@ -211,6 +216,10 @@
modelCache modelCache
} }
}; };
// 收集过滤规则
result.textFilterRules = collectFilterRules();
return result;
} }
function loadVectorConfig(cfg) { function loadVectorConfig(cfg) {
@@ -240,6 +249,9 @@
} }
if (cfg.online.model) $('vector-model-select').value = cfg.online.model; if (cfg.online.model) $('vector-model-select').value = cfg.online.model;
} }
// 加载过滤规则
renderFilterRules(cfg?.textFilterRules || DEFAULT_FILTER_RULES);
} }
function updateLocalModelDesc(modelId) { function updateLocalModelDesc(modelId) {
@@ -278,6 +290,67 @@
if (guideBtn) guideBtn.onclick = e => { e.preventDefault(); openHfGuide(); }; if (guideBtn) guideBtn.onclick = e => { e.preventDefault(); openHfGuide(); };
} }
// ═══════════════════════════════════════════════════════════════════════════
// Filter Rules UI
// ═══════════════════════════════════════════════════════════════════════════
function renderFilterRules(rules) {
const list = $('filter-rules-list');
if (!list) return;
const items = rules?.length ? rules : [];
setHtml(list, items.map((r, i) => `
<div class="filter-rule-item" data-idx="${i}" style="display:flex;gap:6px;align-items:center">
<input type="text" class="filter-rule-start" placeholder="起始(可空)" value="${h(r.start || '')}" style="flex:1;padding:6px 8px;font-size:.8125rem">
<span style="color:var(--txt3)">→</span>
<input type="text" class="filter-rule-end" placeholder="结束(可空)" value="${h(r.end || '')}" style="flex:1;padding:6px 8px;font-size:.8125rem">
<button class="btn btn-sm btn-del filter-rule-del" style="padding:4px 8px">✕</button>
</div>
`).join(''));
// 绑定删除
list.querySelectorAll('.filter-rule-del').forEach(btn => {
btn.onclick = () => {
btn.closest('.filter-rule-item')?.remove();
};
});
}
function collectFilterRules() {
const list = $('filter-rules-list');
if (!list) return [];
const rules = [];
list.querySelectorAll('.filter-rule-item').forEach(item => {
const start = item.querySelector('.filter-rule-start')?.value?.trim() || '';
const end = item.querySelector('.filter-rule-end')?.value?.trim() || '';
if (start || end) {
rules.push({ start, end });
}
});
return rules;
}
function addFilterRule() {
const list = $('filter-rules-list');
if (!list) return;
const idx = list.querySelectorAll('.filter-rule-item').length;
const div = document.createElement('div');
div.className = 'filter-rule-item';
div.dataset.idx = idx;
div.style.cssText = 'display:flex;gap:6px;align-items:center';
setHtml(div, `
<input type="text" class="filter-rule-start" placeholder="起始(可空)" value="" style="flex:1;padding:6px 8px;font-size:.8125rem">
<span style="color:var(--txt3)">→</span>
<input type="text" class="filter-rule-end" placeholder="结束(可空)" value="" style="flex:1;padding:6px 8px;font-size:.8125rem">
<button class="btn btn-sm btn-del filter-rule-del" style="padding:4px 8px">✕</button>
`);
div.querySelector('.filter-rule-del').onclick = () => div.remove();
list.appendChild(div);
}
function updateLocalModelStatus(status, message) { function updateLocalModelStatus(status, message) {
const dot = $('local-model-status').querySelector('.status-dot'); const dot = $('local-model-status').querySelector('.status-dot');
const text = $('local-model-status').querySelector('.status-text'); const text = $('local-model-status').querySelector('.status-text');
@@ -395,6 +468,10 @@
config: { url: $('vector-api-url').value.trim(), key: $('vector-api-key').value.trim(), model: $('vector-model-select').value.trim() } config: { url: $('vector-api-url').value.trim(), key: $('vector-api-key').value.trim(), model: $('vector-model-select').value.trim() }
}); });
}; };
// 过滤规则:添加按钮
$('btn-add-filter-rule').onclick = addFilterRule;
$('btn-gen-vectors').onclick = () => { $('btn-gen-vectors').onclick = () => {
if (vectorGenerating) return; if (vectorGenerating) return;
postMsg('VECTOR_GENERATE', { config: getVectorConfig() }); postMsg('VECTOR_GENERATE', { config: getVectorConfig() });
@@ -403,6 +480,20 @@
if (confirm('确定清除当前聊天的向量数据?')) postMsg('VECTOR_CLEAR'); if (confirm('确定清除当前聊天的向量数据?')) postMsg('VECTOR_CLEAR');
}; };
$('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE'); $('btn-cancel-vectors').onclick = () => postMsg('VECTOR_CANCEL_GENERATE');
// 导入导出
$('btn-export-vectors').onclick = () => {
$('btn-export-vectors').disabled = true;
$('vector-io-status').textContent = '导出中...';
postMsg('VECTOR_EXPORT');
};
$('btn-import-vectors').onclick = () => {
// 让 parent 处理文件选择,避免 iframe 传大文件
$('btn-import-vectors').disabled = true;
$('vector-io-status').textContent = '导入中...';
postMsg('VECTOR_IMPORT_PICK');
};
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// Settings Modal // Settings Modal
@@ -1524,6 +1615,30 @@ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "
updateVectorGenProgress(d.phase, d.current, d.total); updateVectorGenProgress(d.phase, d.current, d.total);
break; break;
case 'VECTOR_EXPORT_RESULT':
$('btn-export-vectors').disabled = false;
if (d.success) {
$('vector-io-status').textContent = `导出成功: ${d.filename} (${(d.size / 1024 / 1024).toFixed(2)}MB)`;
} else {
$('vector-io-status').textContent = '导出失败: ' + (d.error || '未知错误');
}
break;
case 'VECTOR_IMPORT_RESULT':
$('btn-import-vectors').disabled = false;
if (d.success) {
let msg = `导入成功: ${d.chunkCount} 片段, ${d.eventCount} 事件`;
if (d.warnings?.length) {
msg += '\n⚠ ' + d.warnings.join('\n⚠ ');
}
$('vector-io-status').textContent = msg;
// 刷新统计
postMsg('REQUEST_VECTOR_STATS');
} else {
$('vector-io-status').textContent = '导入失败: ' + (d.error || '未知错误');
}
break;
case 'RECALL_LOG': case 'RECALL_LOG':
setRecallLog(d.text || ''); setRecallLog(d.text || '');
break; break;

View File

@@ -393,6 +393,18 @@
</div> </div>
</div> </div>
<!-- 文本过滤规则 -->
<div class="settings-row" style="margin-top:16px">
<div class="settings-field full">
<label>文本过滤规则</label>
<p class="settings-hint" style="margin-bottom:8px">
遇到「起始」后跳过,直到「结束」。起始或结束可单独留空。用于过滤思考标签等干扰内容。
</p>
<div id="filter-rules-list" style="display:flex;flex-direction:column;gap:6px"></div>
<button class="btn btn-sm" id="btn-add-filter-rule" style="margin-top:8px"> 添加规则</button>
</div>
</div>
<!-- Vector Stats --> <!-- Vector Stats -->
<div class="vector-chat-section"> <div class="vector-chat-section">
<div class="settings-row"> <div class="settings-row">
@@ -428,6 +440,20 @@
<button class="btn btn-sm hidden" id="btn-cancel-vectors">取消</button> <button class="btn btn-sm hidden" id="btn-cancel-vectors">取消</button>
</div> </div>
<div class="settings-hint" style="margin-top:8px">首次生成向量可能耗时较久,页面短暂卡顿属正常。若本地模型重进酒馆后需重下。</div> <div class="settings-hint" style="margin-top:8px">首次生成向量可能耗时较久,页面短暂卡顿属正常。若本地模型重进酒馆后需重下。</div>
<!-- 向量导入导出 -->
<div class="vector-io-section" style="border-top:1px solid var(--bdr);padding-top:16px;margin-top:16px">
<div class="settings-row">
<div class="settings-field full">
<label>向量迁移(跨设备 / 防清缓存)</label>
<div class="settings-btn-row" id="vector-io-row" style="margin-top:8px">
<button class="btn btn-sm" id="btn-export-vectors">导出向量</button>
<button class="btn btn-sm" id="btn-import-vectors">导入向量</button>
</div>
<div class="settings-hint" id="vector-io-status"></div>
</div>
</div>
</div>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -58,14 +58,12 @@ import {
import { import {
getMeta, getMeta,
updateMeta, updateMeta,
getAllEventVectors,
saveEventVectors as saveEventVectorsToDb, saveEventVectors as saveEventVectorsToDb,
clearEventVectors, clearEventVectors,
clearAllChunks, clearAllChunks,
saveChunks, saveChunks,
saveChunkVectors, saveChunkVectors,
getStorageStats, getStorageStats,
ensureFingerprintMatch,
} from "./vector/chunk-store.js"; } from "./vector/chunk-store.js";
import { import {
@@ -77,6 +75,9 @@ import {
syncOnMessageReceived, syncOnMessageReceived,
} from "./vector/chunk-builder.js"; } from "./vector/chunk-builder.js";
// vector io
import { exportVectors, importVectors } from "./vector/vector-io.js";
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 常量 // 常量
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -110,6 +111,10 @@ const HIDE_APPLY_DEBOUNCE_MS = 250;
const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
// 向量提醒节流
let lastVectorWarningAt = 0;
const VECTOR_WARNING_COOLDOWN_MS = 120000; // 2分钟内不重复提醒
const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary"; const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary";
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -334,7 +339,7 @@ async function handleGenerateVectors(vectorCfg) {
const fingerprint = getEngineFingerprint(vectorCfg); const fingerprint = getEngineFingerprint(vectorCfg);
const isLocal = vectorCfg.engine === "local"; const isLocal = vectorCfg.engine === "local";
const batchSize = isLocal ? 5 : 20; const batchSize = isLocal ? 5 : 25;
const concurrency = isLocal ? 1 : 2; const concurrency = isLocal ? 1 : 2;
await clearAllChunks(chatId); await clearAllChunks(chatId);
@@ -363,12 +368,10 @@ async function handleGenerateVectors(vectorCfg) {
const store = getSummaryStore(); const store = getSummaryStore();
const events = store?.json?.events || []; const events = store?.json?.events || [];
await ensureFingerprintMatch(chatId, fingerprint); // L2: 全量重建(先清空再重建,保持与 L1 一致性)
const existingVectors = await getAllEventVectors(chatId); await clearEventVectors(chatId);
const existingIds = new Set(existingVectors.map((v) => v.eventId));
const l2Pairs = events const l2Pairs = events
.filter((e) => !existingIds.has(e.id))
.map((e) => ({ id: e.id, text: `${e.title || ""} ${e.summary || ""}`.trim() })) .map((e) => ({ id: e.id, text: `${e.title || ""} ${e.summary || ""}`.trim() }))
.filter((p) => p.text); .filter((p) => p.text);
@@ -386,7 +389,7 @@ async function handleGenerateVectors(vectorCfg) {
const l1Total = allChunks.length; const l1Total = allChunks.length;
const l2Total = events.length; const l2Total = events.length;
let l1Completed = 0; let l1Completed = 0;
let l2Completed = existingIds.size; let l2Completed = 0;
postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: 0, total: l1Total }); postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: 0, total: l1Total });
postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Total }); postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: l2Completed, total: l2Total });
@@ -482,6 +485,9 @@ async function handleGenerateVectors(vectorCfg) {
await saveEventVectorsToDb(chatId, l2VectorItems, fingerprint); await saveEventVectorsToDb(chatId, l2VectorItems, fingerprint);
} }
// 更新 fingerprint无论之前是否匹配
await updateMeta(chatId, { fingerprint });
postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: -1, total: 0 }); postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L1", current: -1, total: 0 });
postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: -1, total: 0 }); postToFrame({ type: "VECTOR_GEN_PROGRESS", phase: "L2", current: -1, total: 0 });
await sendVectorStatsToFrame(); await sendVectorStatsToFrame();
@@ -493,6 +499,57 @@ async function handleGenerateVectors(vectorCfg) {
xbLog.info(MODULE_ID, `向量生成完成: L1=${l1Vectors.filter(Boolean).length}, L2=${l2VectorItems.length}`); xbLog.info(MODULE_ID, `向量生成完成: L1=${l1Vectors.filter(Boolean).length}, L2=${l2VectorItems.length}`);
} }
// ═══════════════════════════════════════════════════════════════════════════
// 向量完整性检测(仅提醒,不自动操作)
// ═══════════════════════════════════════════════════════════════════════════
async function checkVectorIntegrityAndWarn() {
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
// 节流2分钟内不重复提醒
const now = Date.now();
if (now - lastVectorWarningAt < VECTOR_WARNING_COOLDOWN_MS) return;
const { chat, chatId } = getContext();
if (!chatId || !chat?.length) return;
const store = getSummaryStore();
const totalFloors = chat.length;
const totalEvents = store?.json?.events?.length || 0;
// 如果没有总结数据,不需要向量
if (totalEvents === 0) return;
const meta = await getMeta(chatId);
const stats = await getStorageStats(chatId);
const fingerprint = getEngineFingerprint(vectorCfg);
const issues = [];
// 指纹不匹配
if (meta.fingerprint && meta.fingerprint !== fingerprint) {
issues.push('向量引擎/模型已变更');
}
// L1 不完整
const chunkFloorGap = totalFloors - 1 - (meta.lastChunkFloor ?? -1);
if (chunkFloorGap > 0) {
issues.push(`${chunkFloorGap} 层片段未向量化`);
}
// L2 不完整
const eventVectorGap = totalEvents - stats.eventVectors;
if (eventVectorGap > 0) {
issues.push(`${eventVectorGap} 个事件未向量化`);
}
if (issues.length > 0) {
lastVectorWarningAt = now;
await executeSlashCommand(`/echo severity=warning 向量数据不完整:${issues.join('、')}。请打开剧情总结面板点击"生成向量"。`);
}
}
async function handleClearVectors() { async function handleClearVectors() {
const { chatId } = getContext(); const { chatId } = getContext();
if (!chatId) return; if (!chatId) return;
@@ -918,6 +975,66 @@ function handleFrameMessage(event) {
try { vectorAbortController?.abort?.(); } catch {} try { vectorAbortController?.abort?.(); } catch {}
break; break;
case "VECTOR_EXPORT":
(async () => {
try {
const result = await exportVectors((status) => {
postToFrame({ type: "VECTOR_IO_STATUS", status });
});
postToFrame({
type: "VECTOR_EXPORT_RESULT",
success: true,
filename: result.filename,
size: result.size,
chunkCount: result.chunkCount,
eventCount: result.eventCount,
});
} catch (e) {
postToFrame({ type: "VECTOR_EXPORT_RESULT", success: false, error: e.message });
}
})();
break;
case "VECTOR_IMPORT_PICK":
// 在 parent 创建 file picker避免 iframe 传大文件
(async () => {
const input = document.createElement("input");
input.type = "file";
input.accept = ".zip";
input.onchange = async () => {
const file = input.files?.[0];
if (!file) {
postToFrame({ type: "VECTOR_IMPORT_RESULT", success: false, error: "未选择文件" });
return;
}
try {
const result = await importVectors(file, (status) => {
postToFrame({ type: "VECTOR_IO_STATUS", status });
});
postToFrame({
type: "VECTOR_IMPORT_RESULT",
success: true,
chunkCount: result.chunkCount,
eventCount: result.eventCount,
warnings: result.warnings,
fingerprintMismatch: result.fingerprintMismatch,
});
await sendVectorStatsToFrame();
} catch (e) {
postToFrame({ type: "VECTOR_IMPORT_RESULT", success: false, error: e.message });
}
};
input.click();
})();
break;
case "REQUEST_VECTOR_STATS":
sendVectorStatsToFrame();
break;
case "REQUEST_CLEAR": { case "REQUEST_CLEAR": {
const { chat, chatId } = getContext(); const { chat, chatId } = getContext();
clearSummaryData(chatId); clearSummaryData(chatId);
@@ -1051,6 +1168,9 @@ async function handleChatChanged() {
await sendFrameBaseData(store, newLength); await sendFrameBaseData(store, newLength);
sendFrameFullData(store, newLength); sendFrameFullData(store, newLength);
} }
// 检测向量完整性并提醒(仅提醒,不自动操作)
setTimeout(() => checkVectorIntegrityAndWarn(), 2000);
} }
async function handleMessageDeleted() { async function handleMessageDeleted() {

View File

@@ -18,6 +18,7 @@ import {
} from './chunk-store.js'; } from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js'; import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../core/debug-core.js';
import { filterText } from './text-filter.js';
const MODULE_ID = 'chunk-builder'; const MODULE_ID = 'chunk-builder';
@@ -47,9 +48,9 @@ export function chunkMessage(floor, message, maxTokens = CHUNK_MAX_TOKENS) {
const speaker = message.name || (message.is_user ? '用户' : '角色'); const speaker = message.name || (message.is_user ? '用户' : '角色');
const isUser = !!message.is_user; const isUser = !!message.is_user;
const cleanText = text // 1. 应用用户自定义过滤规则
.replace(/<think>[\s\S]*?<\/think>/gi, '') // 2. 移除 TTS 标记(硬编码)
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '') const cleanText = filterText(text)
.replace(/\[tts:[^\]]*\]/gi, '') .replace(/\[tts:[^\]]*\]/gi, '')
.trim(); .trim();

View File

@@ -11,6 +11,7 @@ import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js'; import { getSummaryStore } from '../data/store.js';
import { filterText } from './text-filter.js';
const MODULE_ID = 'recall'; const MODULE_ID = 'recall';
@@ -139,12 +140,10 @@ function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim(); return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
} }
function stripNoise(text) { function cleanForRecall(text) {
return String(text || '') // 1. 应用用户自定义过滤规则
.replace(/<think>[\s\S]*?<\/think>/gi, '') // 2. 移除 TTS 标记(硬编码)
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '') return filterText(text).replace(/\[tts:[^\]]*\]/gi, '').trim();
.replace(/\[tts:[^\]]*\]/gi, '')
.trim();
} }
function buildExpDecayWeights(n, beta) { function buildExpDecayWeights(n, beta) {
@@ -180,7 +179,7 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
return messages.slice(-count).map((m, idx, arr) => { return messages.slice(-count).map((m, idx, arr) => {
const speaker = m.name || (m.is_user ? '用户' : '角色'); const speaker = m.name || (m.is_user ? '用户' : '角色');
const clean = stripNoise(m.mes); const clean = cleanForRecall(m.mes);
if (!clean) return ''; if (!clean) return '';
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS; const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
return `${speaker}: ${clean.slice(0, limit)}`; return `${speaker}: ${clean.slice(0, limit)}`;
@@ -773,7 +772,7 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
messages = messages.slice(0, -1); messages = messages.slice(0, -1);
} }
return messages.slice(-count).map(m => { return messages.slice(-count).map(m => {
const text = cleanForRecall(m.mes); const text = cleanForRecall(m.mes);
const speaker = m.name || (m.is_user ? '用户' : '角色'); const speaker = m.name || (m.is_user ? '用户' : '角色');
return `${speaker}: ${text.slice(0, 500)}`; return `${speaker}: ${text.slice(0, 500)}`;

View File

@@ -0,0 +1,63 @@
// ═══════════════════════════════════════════════════════════════════════════
// Text Filter - 通用文本过滤
// 跳过用户定义的「起始→结束」区间
// ═══════════════════════════════════════════════════════════════════════════
import { getTextFilterRules } from '../data/config.js';
/**
* 转义正则特殊字符
*/
function escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* 应用过滤规则
* - start + end删除 start...end含边界
* - start 空 + end从开头删到 end
* - start + end 空:从 start 删到结尾
* - 两者都空:跳过
*/
export function applyTextFilterRules(text, rules) {
if (!text || !rules?.length) return text;
let result = text;
for (const rule of rules) {
const start = rule.start ?? '';
const end = rule.end ?? '';
if (!start && !end) continue;
if (start && end) {
// 标准区间:删除 start...end含边界非贪婪
const regex = new RegExp(
escapeRegex(start) + '[\\s\\S]*?' + escapeRegex(end),
'gi'
);
result = result.replace(regex, '');
} else if (start && !end) {
// 从 start 到结尾
const idx = result.toLowerCase().indexOf(start.toLowerCase());
if (idx !== -1) {
result = result.slice(0, idx);
}
} else if (!start && end) {
// 从开头到 end
const idx = result.toLowerCase().indexOf(end.toLowerCase());
if (idx !== -1) {
result = result.slice(idx + end.length);
}
}
}
return result.trim();
}
/**
* 便捷方法:使用当前配置过滤文本
*/
export function filterText(text) {
return applyTextFilterRules(text, getTextFilterRules());
}

View File

@@ -0,0 +1,301 @@
// ═══════════════════════════════════════════════════════════════════════════
// Vector Import/Export
// 向量数据导入导出(当前 chatId 级别)
// ═══════════════════════════════════════════════════════════════════════════
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs';
import { getContext } from '../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js';
import {
getMeta,
updateMeta,
getAllChunks,
getAllChunkVectors,
getAllEventVectors,
saveChunks,
saveChunkVectors,
clearAllChunks,
clearEventVectors,
saveEventVectors,
} from './chunk-store.js';
import { getEngineFingerprint } from './embedder.js';
import { getVectorConfig } from '../data/config.js';
const MODULE_ID = 'vector-io';
const EXPORT_VERSION = 1;
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════
function float32ToBytes(vectors, dims) {
const totalFloats = vectors.length * dims;
const buffer = new ArrayBuffer(totalFloats * 4);
const view = new Float32Array(buffer);
let offset = 0;
for (const vec of vectors) {
for (let i = 0; i < dims; i++) {
view[offset++] = vec[i] || 0;
}
}
return new Uint8Array(buffer);
}
function bytesToFloat32(bytes, dims) {
const view = new Float32Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 4);
const vectors = [];
for (let i = 0; i < view.length; i += dims) {
vectors.push(Array.from(view.slice(i, i + dims)));
}
return vectors;
}
function downloadBlob(blob, filename) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
// ═══════════════════════════════════════════════════════════════════════════
// 导出
// ═══════════════════════════════════════════════════════════════════════════
export async function exportVectors(onProgress) {
const { chatId } = getContext();
if (!chatId) {
throw new Error('未打开聊天');
}
onProgress?.('读取数据...');
const meta = await getMeta(chatId);
const chunks = await getAllChunks(chatId);
const chunkVectors = await getAllChunkVectors(chatId);
const eventVectors = await getAllEventVectors(chatId);
if (chunks.length === 0 && eventVectors.length === 0) {
throw new Error('没有可导出的向量数据');
}
// 确定维度
const dims = chunkVectors[0]?.vector?.length || eventVectors[0]?.vector?.length || 0;
if (dims === 0) {
throw new Error('无法确定向量维度');
}
onProgress?.('构建索引...');
// 构建 chunk 索引(按 chunkId 排序保证顺序一致)
const sortedChunks = [...chunks].sort((a, b) => a.chunkId.localeCompare(b.chunkId));
const chunkVectorMap = new Map(chunkVectors.map(cv => [cv.chunkId, cv.vector]));
// chunks.jsonl
const chunksJsonl = sortedChunks.map(c => JSON.stringify({
chunkId: c.chunkId,
floor: c.floor,
chunkIdx: c.chunkIdx,
speaker: c.speaker,
isUser: c.isUser,
text: c.text,
textHash: c.textHash,
})).join('\n');
// chunk_vectors.bin按 sortedChunks 顺序)
const chunkVectorsOrdered = sortedChunks.map(c => chunkVectorMap.get(c.chunkId) || new Array(dims).fill(0));
onProgress?.('压缩向量...');
// 构建 event 索引
const sortedEventVectors = [...eventVectors].sort((a, b) => a.eventId.localeCompare(b.eventId));
const eventsJsonl = sortedEventVectors.map(ev => JSON.stringify({
eventId: ev.eventId,
})).join('\n');
// event_vectors.bin
const eventVectorsOrdered = sortedEventVectors.map(ev => ev.vector);
// manifest
const manifest = {
version: EXPORT_VERSION,
exportedAt: Date.now(),
chatId,
fingerprint: meta.fingerprint || '',
dims,
chunkCount: sortedChunks.length,
chunkVectorCount: chunkVectors.length,
eventCount: sortedEventVectors.length,
lastChunkFloor: meta.lastChunkFloor ?? -1,
};
onProgress?.('打包文件...');
// 打包 zip
const zipData = zipSync({
'manifest.json': strToU8(JSON.stringify(manifest, null, 2)),
'chunks.jsonl': strToU8(chunksJsonl),
'chunk_vectors.bin': float32ToBytes(chunkVectorsOrdered, dims),
'events.jsonl': strToU8(eventsJsonl),
'event_vectors.bin': float32ToBytes(eventVectorsOrdered, dims),
}, { level: 1 }); // 降低压缩级别,速度优先
onProgress?.('下载文件...');
// 生成文件名
const timestamp = new Date().toISOString().slice(0, 10).replace(/-/g, '');
const shortChatId = chatId.slice(0, 8);
const filename = `vectors_${shortChatId}_${timestamp}.zip`;
downloadBlob(new Blob([zipData]), filename);
const sizeMB = (zipData.byteLength / 1024 / 1024).toFixed(2);
xbLog.info(MODULE_ID, `导出完成: ${filename} (${sizeMB}MB)`);
return {
filename,
size: zipData.byteLength,
chunkCount: sortedChunks.length,
eventCount: sortedEventVectors.length,
};
}
// ═══════════════════════════════════════════════════════════════════════════
// 导入
// ═══════════════════════════════════════════════════════════════════════════
export async function importVectors(file, onProgress) {
const { chatId } = getContext();
if (!chatId) {
throw new Error('未打开聊天');
}
onProgress?.('读取文件...');
const arrayBuffer = await file.arrayBuffer();
const zipData = new Uint8Array(arrayBuffer);
onProgress?.('解压文件...');
let unzipped;
try {
unzipped = unzipSync(zipData);
} catch (e) {
throw new Error('文件格式错误,无法解压');
}
// 读取 manifest
if (!unzipped['manifest.json']) {
throw new Error('缺少 manifest.json');
}
const manifest = JSON.parse(strFromU8(unzipped['manifest.json']));
if (manifest.version !== EXPORT_VERSION) {
throw new Error(`不支持的版本: ${manifest.version}`);
}
onProgress?.('校验数据...');
// 校验 fingerprint
const vectorCfg = getVectorConfig();
const currentFingerprint = vectorCfg ? getEngineFingerprint(vectorCfg) : '';
const fingerprintMismatch = manifest.fingerprint && currentFingerprint && manifest.fingerprint !== currentFingerprint;
// chatId 校验(警告但允许)
const chatIdMismatch = manifest.chatId !== chatId;
const warnings = [];
if (fingerprintMismatch) {
warnings.push(`向量引擎不匹配(文件: ${manifest.fingerprint}, 当前: ${currentFingerprint}),导入后需重新生成`);
}
if (chatIdMismatch) {
warnings.push(`聊天ID不匹配文件: ${manifest.chatId}, 当前: ${chatId}`);
}
onProgress?.('解析数据...');
// 解析 chunks
const chunksJsonl = unzipped['chunks.jsonl'] ? strFromU8(unzipped['chunks.jsonl']) : '';
const chunkMetas = chunksJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// 解析 chunk vectors
const chunkVectorsBytes = unzipped['chunk_vectors.bin'];
const chunkVectors = chunkVectorsBytes ? bytesToFloat32(chunkVectorsBytes, manifest.dims) : [];
// 解析 events
const eventsJsonl = unzipped['events.jsonl'] ? strFromU8(unzipped['events.jsonl']) : '';
const eventMetas = eventsJsonl.split('\n').filter(Boolean).map(line => JSON.parse(line));
// 解析 event vectors
const eventVectorsBytes = unzipped['event_vectors.bin'];
const eventVectors = eventVectorsBytes ? bytesToFloat32(eventVectorsBytes, manifest.dims) : [];
// 校验数量
if (chunkMetas.length !== chunkVectors.length) {
throw new Error(`chunk 数量不匹配: 元数据 ${chunkMetas.length}, 向量 ${chunkVectors.length}`);
}
if (eventMetas.length !== eventVectors.length) {
throw new Error(`event 数量不匹配: 元数据 ${eventMetas.length}, 向量 ${eventVectors.length}`);
}
onProgress?.('清空旧数据...');
// 清空当前数据
await clearAllChunks(chatId);
await clearEventVectors(chatId);
onProgress?.('写入数据...');
// 写入 chunks
if (chunkMetas.length > 0) {
const chunksToSave = chunkMetas.map(meta => ({
chunkId: meta.chunkId,
floor: meta.floor,
chunkIdx: meta.chunkIdx,
speaker: meta.speaker,
isUser: meta.isUser,
text: meta.text,
textHash: meta.textHash,
}));
await saveChunks(chatId, chunksToSave);
// 写入 chunk vectors
const chunkVectorItems = chunkMetas.map((meta, idx) => ({
chunkId: meta.chunkId,
vector: chunkVectors[idx],
}));
await saveChunkVectors(chatId, chunkVectorItems, manifest.fingerprint);
}
// 写入 event vectors
if (eventMetas.length > 0) {
const eventVectorItems = eventMetas.map((meta, idx) => ({
eventId: meta.eventId,
vector: eventVectors[idx],
}));
await saveEventVectors(chatId, eventVectorItems, manifest.fingerprint);
}
// 更新 meta
await updateMeta(chatId, {
fingerprint: manifest.fingerprint,
lastChunkFloor: manifest.lastChunkFloor,
});
xbLog.info(MODULE_ID, `导入完成: ${chunkMetas.length} chunks, ${eventMetas.length} events`);
return {
chunkCount: chunkMetas.length,
eventCount: eventMetas.length,
warnings,
fingerprintMismatch,
};
}