Update story summary recall and prompt

This commit is contained in:
2026-02-05 00:22:02 +08:00
parent 12db08abe0
commit 8137e206f9
18 changed files with 708 additions and 406 deletions

View File

@@ -3,7 +3,7 @@
// 标准 RAG chunking: ~200 tokens per chunk
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import { getContext } from '../../../../../../../extensions.js';
import {
getMeta,
updateMeta,
@@ -15,10 +15,10 @@ import {
makeChunkId,
hashText,
CHUNK_MAX_TOKENS,
} from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { filterText } from './text-filter.js';
} from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'chunk-builder';
@@ -339,7 +339,7 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
// 本地模型未加载时跳过(避免意外触发下载或报错)
if (vectorConfig.engine === "local") {
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("./embedder.js");
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
if (!isLocalModelLoaded(modelId)) return;
}

View File

@@ -3,8 +3,8 @@
// 事件监听 + 回滚钩子注册
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
import { xbLog } from '../../../../core/debug-core.js';
import {
saveStateAtoms,
saveStateVectors,
@@ -12,9 +12,9 @@ import {
deleteStateVectorsFromFloor,
getStateAtoms,
clearStateVectors,
} from './state-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { getVectorConfig } from '../data/config.js';
} from '../storage/state-store.js';
import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'state-integration';

View File

@@ -3,11 +3,11 @@
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
// ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js';
import { getAllStateVectors, getStateAtoms } from './state-store.js';
import { getMeta } from './chunk-store.js';
import { getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
import { getMeta } from '../storage/chunk-store.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'state-recall';

View File

@@ -1,4 +1,4 @@
// Story Summary - Recall Engine
// Story Summary - Recall Engine
// L1 chunk + L2 event 召回
// - 全量向量打分
// - 实体权重归一化分配
@@ -8,19 +8,19 @@
// - MMR 去重(融合后执行)
// - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js';
import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../data/store.js';
import { filterText } from './text-filter.js';
import { getAllChunks, getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../../extensions.js';
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../../data/store.js';
import { filterText } from '../utils/text-filter.js';
import {
searchStateAtoms,
buildL0FloorBonus,
stateToVirtualChunks,
mergeAndSparsify,
} from './state-recall.js';
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
} from '../pipeline/state-recall.js';
import { ensureEventTextIndex, searchEventsByText, ensureChunkTextIndex, searchChunksByText } from './text-search.js';
import {
extractRareTerms,
extractNounsFromFactsO,
@@ -29,10 +29,8 @@ import {
const MODULE_ID = 'recall';
const CONFIG = {
QUERY_MSG_COUNT: 5,
QUERY_DECAY_BETA: 0.7,
QUERY_MAX_CHARS: 600,
QUERY_CONTEXT_CHARS: 240,
QUERY_MSG_COUNT: 3,
QUERY_DECAY_BETA: 0.6,
CAUSAL_CHAIN_MAX_DEPTH: 10,
CAUSAL_INJECT_MAX: 30,
@@ -216,11 +214,26 @@ function extractRelationTarget(p) {
return '';
}
function buildExpDecayWeights(n, beta) {
function buildContentAwareWeights(segments, beta = 0.6) {
const n = segments.length;
if (n === 0) return [];
if (n === 1) return [1.0];
const last = n - 1;
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
const sum = w.reduce((a, b) => a + b, 0) || 1;
return w.map(x => x / sum);
const SHORT_THRESHOLD = 15;
const raw = [];
for (let i = 0; i < n; i++) {
const posWeight = Math.exp(beta * (i - last));
const len = String(segments[i] || '').replace(/\s+/g, '').length;
const contentFactor = len >= SHORT_THRESHOLD
? 1.0
: Math.max(0.3, Math.sqrt(len / SHORT_THRESHOLD));
raw.push(posWeight * contentFactor);
}
const sum = raw.reduce((a, b) => a + b, 0) || 1;
return raw.map(w => w / sum);
}
// ═══════════════════════════════════════════════════════════════════════════
@@ -247,19 +260,16 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
}
}
return messages.slice(-count).map((m, idx, arr) => {
const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色");
const clean = cleanForRecall(m.mes);
if (!clean) return '';
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
return `${speaker}: ${clean.slice(0, limit)}`;
}).filter(Boolean);
return messages.slice(-count)
.map((m) => cleanForRecall(m.mes) || '')
.filter(Boolean);
}
async function embedWeightedQuery(segments, vectorConfig) {
if (!segments?.length) return null;
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
const weights = buildContentAwareWeights(segments, CONFIG.QUERY_DECAY_BETA);
const vecs = await embed(segments, vectorConfig);
const dims = vecs?.[0]?.length || 0;
if (!dims) return null;
@@ -377,19 +387,6 @@ function expandByFacts(presentEntities, facts, maxDepth = 2) {
// 实体权重归一化(用于加分分配)
// ═══════════════════════════════════════════════════════════════════════════
function normalizeEntityWeights(queryEntityWeights) {
if (!queryEntityWeights?.size) return new Map();
const total = Array.from(queryEntityWeights.values()).reduce((a, b) => a + b, 0);
if (total <= 0) return new Map();
const normalized = new Map();
for (const [entity, weight] of queryEntityWeights) {
normalized.set(entity, weight / total);
}
return normalized;
}
// ═══════════════════════════════════════════════════════════════════════════
// 文本路 Query 构建(分层高信号词)
// ═══════════════════════════════════════════════════════════════════════════
@@ -548,7 +545,167 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
// 构建/更新文本索引
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
ensureEventTextIndex(allEvents, revision);
// 文本路检索
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
const textGapInfo = textRanked._gapInfo || null;
// 向量路检索
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
// L0 加权
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
break;
}
}
}
const participants = (event.participants || []).map(p => normalize(p));
const hasPresent = participants.some(p => queryEntitySet.has(p));
return {
_id: event.id,
_idx: idx,
event,
rawSim,
finalScore: rawSim + bonus,
vector: v,
_hasPresent: hasPresent,
};
});
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
const preFilterDistribution = {
total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
threshold: CONFIG.MIN_SIMILARITY_EVENT,
};
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
const vectorRanked = candidates.map(s => ({
event: s.event,
similarity: s.finalScore,
rawSim: s.rawSim,
vector: s.vector,
}));
const eventById = new Map(allEvents.map(e => [e.id, e]));
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
// TEXT-only 质量门槛
const textOnlyStats = {
total: 0,
passedSoftCheck: 0,
filtered: 0,
};
const filtered = fused.filter(x => {
if (x.type !== 'TEXT') return true;
textOnlyStats.total++;
const sim = x.rawSim || rawSimById.get(x.id) || 0;
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
textOnlyStats.passedSoftCheck++;
return true;
}
textOnlyStats.filtered++;
return false;
});
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector || null,
c => c.rrf
);
// TEXT-only 限额MMR 后执行)
let textOnlyCount = 0;
let textOnlyTruncated = 0;
const finalResults = mmrOutput.filter(x => {
if (x.type !== 'TEXT') return true;
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
textOnlyCount++;
return true;
}
textOnlyTruncated++;
return false;
});
textOnlyStats.finalIncluded = textOnlyCount;
textOnlyStats.truncatedByLimit = textOnlyTruncated;
const results = finalResults.map(x => ({
event: x.event,
similarity: x.rrf,
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
_recallReason: x.type,
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
_rawSim: rawSimById.get(x.event?.id) || 0,
}));
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyTotal: textOnlyStats.total,
};
results[0]._textOnlyStats = textOnlyStats;
results[0]._textGapInfo = textGapInfo;
}
return results;
}
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1, textSearchParams = null) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
@@ -577,6 +734,58 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
};
});
// 文本路补充(仅待整理区)
let textL1Stats = null;
const store = getSummaryStore();
const keepVisible = store?.keepVisibleCount ?? 3;
const recentStart = lastSummarizedFloor + 1;
const recentEnd = (meta?.lastChunkFloor ?? -1) - keepVisible;
if (textSearchParams && recentEnd >= recentStart && recentEnd >= 0) {
const { queryEntities, rareTerms } = textSearchParams;
const textQuery = [...(queryEntities || []), ...(rareTerms || [])].join(' ');
if (textQuery.trim()) {
const allChunks = await getAllChunks(chatId);
const recentChunks = allChunks.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
if (recentChunks.length > 0) {
const revision = `${chatId}:chunk:${recentEnd}`;
ensureChunkTextIndex(recentChunks, revision);
const textHits = searchChunksByText(textQuery, recentStart, recentEnd, 20);
textL1Stats = {
range: `${recentStart + 1}~${recentEnd + 1}`,
candidates: recentChunks.length,
hits: textHits.length,
};
for (const hit of textHits) {
const existingIdx = scored.findIndex(s => s.chunkId === hit.chunkId);
if (existingIdx >= 0) {
scored[existingIdx]._hasTextHit = true;
scored[existingIdx]._textRank = hit.textRank;
} else {
scored.push({
_id: hit.chunkId,
chunkId: hit.chunkId,
floor: hit.floor,
chunkIdx: 0,
similarity: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
_baseSimilarity: 0,
_l0Bonus: 0,
_recallReason: 'TEXT_L1',
_textRank: hit.textRank,
vector: null,
});
}
}
}
}
}
const candidates = scored
.filter(s => {
const threshold = s.floor > lastSummarizedFloor
@@ -599,6 +808,7 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
'<0.55': scored.filter(s => s.similarity < 0.55).length,
},
textL1: textL1Stats,
};
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
@@ -636,6 +846,8 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
isUser: chunk.isUser,
text: chunk.text,
similarity: item.similarity,
_recallReason: item._recallReason,
_textRank: item._textRank,
};
}).filter(Boolean);
@@ -646,184 +858,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
return results;
}
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索RRF 混合 + MMR 后置)
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
// 构建/更新文本索引
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
ensureEventTextIndex(allEvents, revision);
// 文本路检索
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
const textGapInfo = textRanked._gapInfo || null;
// ═══════════════════════════════════════════════════════════════════════
// 向量路检索(只保留 L0 加权)
// ═══════════════════════════════════════════════════════════════════════
const ENTITY_BONUS_POOL = 0.10;
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
// L0 加权
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
break;
}
}
}
const participants = (event.participants || []).map(p => normalize(p));
let maxEntityWeight = 0;
for (const p of participants) {
const w = normalizedEntityWeights.get(p) || 0;
if (w > maxEntityWeight) {
maxEntityWeight = w;
}
}
const entityBonus = ENTITY_BONUS_POOL * maxEntityWeight;
bonus += entityBonus;
return {
_id: event.id,
_idx: idx,
event,
rawSim,
finalScore: rawSim + bonus,
vector: v,
_entityBonus: entityBonus,
_hasPresent: maxEntityWeight > 0,
};
});
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
const entityBonusById = new Map(scored.map(s => [s._id, s._entityBonus]));
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
const preFilterDistribution = {
total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
threshold: CONFIG.MIN_SIMILARITY_EVENT,
};
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
const vectorRanked = candidates.map(s => ({
event: s.event,
similarity: s.finalScore,
rawSim: s.rawSim,
vector: s.vector,
}));
const eventById = new Map(allEvents.map(e => [e.id, e]));
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
const textOnlyStats = {
total: 0,
passedSoftCheck: 0,
filtered: 0,
finalIncluded: 0,
truncatedByLimit: 0,
};
const filtered = fused.filter(x => {
if (x.type !== 'TEXT') return true;
textOnlyStats.total++;
const sim = x.rawSim || rawSimById.get(x.id) || 0;
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
textOnlyStats.passedSoftCheck++;
return true;
}
textOnlyStats.filtered++;
return false;
});
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector || null,
c => c.rrf
);
let textOnlyCount = 0;
const finalResults = mmrOutput.filter(x => {
if (x.type !== 'TEXT') return true;
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
textOnlyCount++;
return true;
}
textOnlyStats.truncatedByLimit++;
return false;
});
textOnlyStats.finalIncluded = textOnlyCount;
const results = finalResults.map(x => ({
event: x.event,
similarity: x.rrf,
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
_recallReason: x.type,
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
_entityBonus: entityBonusById.get(x.event?.id) || 0,
_rawSim: rawSimById.get(x.event?.id) || 0,
}));
// 统计信息附加到第一条结果
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyTotal: textOnlyStats.total,
};
results[0]._textOnlyStats = textOnlyStats;
results[0]._textGapInfo = textGapInfo;
}
return results;
}
// ═══════════════════════════════════════════════════════════════════════════
// 日志
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({
elapsed,
segments,
@@ -831,7 +865,7 @@ function formatRecallLog({
chunkResults,
eventResults,
allEvents,
normalizedEntityWeights = new Map(),
queryEntities = [],
causalEvents = [],
chunkPreFilterStats = null,
l0Results = [],
@@ -840,15 +874,15 @@ function formatRecallLog({
textQueryBreakdown = null,
}) {
const lines = [
'\u2554' + '\u2550'.repeat(62) + '\u2557',
'\u2551 记忆召回报告 \u2551',
'\u2560' + '\u2550'.repeat(62) + '\u2563',
`\u2551 耗时: ${elapsed}ms`,
'\u255a' + '\u2550'.repeat(62) + '\u255d',
'' + ''.repeat(62) + '',
' 记忆召回报告 ',
'' + ''.repeat(62) + '',
` 耗时: ${elapsed}ms`,
'' + ''.repeat(62) + '',
'',
'\u250c' + '\u2500'.repeat(61) + '\u2510',
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
'\u2514' + '\u2500'.repeat(61) + '\u2518',
'' + ''.repeat(61) + '',
` 【查询构建】最近 ${CONFIG.QUERY_MSG_COUNT} 条,内容感知加权 (β=${CONFIG.QUERY_DECAY_BETA}) │`,
'' + ''.repeat(61) + '',
];
const segmentsSorted = segments.map((s, i) => ({
@@ -858,25 +892,19 @@ function formatRecallLog({
})).sort((a, b) => b.weight - a.weight);
segmentsSorted.forEach((s, rank) => {
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
const bar = ''.repeat(Math.round(s.weight * 20));
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
const marker = rank === 0 ? ' ◀ 主导' : '';
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
});
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 【提取实体】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
lines.push('' + ''.repeat(61) + '');
lines.push(' 【提取实体】 ');
lines.push('' + ''.repeat(61) + '');
if (normalizedEntityWeights?.size) {
const sorted = Array.from(normalizedEntityWeights.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 8);
const formatted = sorted
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
.join(' | ');
lines.push(` ${formatted}`);
if (queryEntities?.length) {
lines.push(` 焦点: ${queryEntities.slice(0, 8).join('、')}${queryEntities.length > 8 ? ' ...' : ''}`);
} else {
lines.push(' (无)');
}
@@ -885,9 +913,9 @@ function formatRecallLog({
}
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 【文本路 Query 构成】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
lines.push('' + ''.repeat(61) + '');
lines.push(' 【文本路 Query 构成】 ');
lines.push('' + ''.repeat(61) + '');
if (textQueryBreakdown) {
const bd = textQueryBreakdown;
@@ -919,23 +947,9 @@ function formatRecallLog({
}
lines.push('');
lines.push(' 实体归一化(用于加分):');
if (normalizedEntityWeights?.size) {
const sorted = Array.from(normalizedEntityWeights.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 8);
const formatted = sorted
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
.join(' | ');
lines.push(` ${formatted}`);
} else {
lines.push(' (无)');
}
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 【召回统计】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
lines.push('┌' + '─'.repeat(61) + '┐');
lines.push('│ 【召回统计】 │');
lines.push('└' + '─'.repeat(61) + '┘');
// L0
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
@@ -953,6 +967,11 @@ function formatRecallLog({
const dist = chunkPreFilterStats.distribution || {};
lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length}`);
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
const textL1 = chunkPreFilterStats.textL1;
if (textL1) {
lines.push(` 文本路补充(待整理区): 范围 ${textL1.range}楼 | 候选 ${textL1.candidates} 条 | 命中 ${textL1.hits}`);
}
} else {
lines.push(` 选入: ${chunkResults.length}`);
}
@@ -988,9 +1007,6 @@ function formatRecallLog({
lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
});
}
const entityBoostedEvents = eventResults.filter(e => e._entityBonus > 0).length;
lines.push('');
lines.push(` 实体加分事件: ${entityBoostedEvents}`);
if (textGapInfo) {
lines.push('');
@@ -1002,7 +1018,6 @@ function formatRecallLog({
}
}
// Causal
if (causalEvents.length) {
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
@@ -1012,13 +1027,8 @@ function formatRecallLog({
}
lines.push('');
return lines.join('\n');
return lines.join("\n");
}
// ═══════════════════════════════════════════════════════════════════════════
// 主入口
// ═══════════════════════════════════════════════════════════════════════════
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
const T0 = performance.now();
const { chat } = getContext();
@@ -1049,9 +1059,9 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
const queryEntities = Array.from(queryEntityWeights.keys());
const queryEntitySet = new Set(queryEntities.map(normalize));
const facts = getFacts(store);
const expandedTerms = expandByFacts(queryEntities, facts, 2);
const normalizedEntityWeights = normalizeEntityWeights(queryEntityWeights);
let queryTextForSearch = '';
let textQueryBreakdown = null;
@@ -1079,8 +1089,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
}
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus),
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor, {
queryEntities,
rareTerms: textQueryBreakdown?.rareTerms || [],
}),
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus),
]);
const chunkPreFilterStats = chunkResults._preFilterStats || null;
@@ -1118,7 +1131,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
chunkResults: mergedChunks,
eventResults,
allEvents,
normalizedEntityWeights,
queryEntities,
causalEvents: causalEventsTruncated,
chunkPreFilterStats,
l0Results,
@@ -1149,3 +1162,8 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
return `${speaker}: ${text.slice(0, 500)}`;
}).filter(Boolean).join('\n');
}

View File

@@ -1,6 +1,6 @@
// text-search.js - 最终版
import MiniSearch from '../../../libs/minisearch.mjs';
import MiniSearch from '../../../../libs/minisearch.mjs';
const STOP_WORDS = new Set([
'的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
@@ -106,7 +106,7 @@ export function ensureEventTextIndex(events, revision) {
*
* 参考帕累托法则80/20 法则在信息检索中的应用
*/
function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
export function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
if (!scores.length) return 0;
const total = scores.reduce((a, b) => a + b, 0);
@@ -171,3 +171,67 @@ export function clearEventTextIndex() {
idx = null;
lastRevision = null;
}
// ---------------------------------------------------------------------------
// Chunk 文本索引(待整理区 L1 补充)
// ---------------------------------------------------------------------------
let chunkIdx = null;
let chunkIdxRevision = null;
export function ensureChunkTextIndex(chunks, revision) {
if (chunkIdx && revision === chunkIdxRevision) return;
try {
chunkIdx = new MiniSearch({
fields: ['text'],
storeFields: ['chunkId', 'floor'],
tokenize,
searchOptions: { tokenize },
});
chunkIdx.addAll(chunks.map(c => ({
id: c.chunkId,
chunkId: c.chunkId,
floor: c.floor,
text: c.text || '',
})));
chunkIdxRevision = revision;
} catch (e) {
console.error('[text-search] Chunk index build failed:', e);
chunkIdx = null;
}
}
export function searchChunksByText(query, floorMin, floorMax, limit = 20) {
if (!chunkIdx || !query?.trim()) return [];
try {
const results = chunkIdx.search(query, {
fuzzy: false,
prefix: false,
});
const filtered = results.filter(r => r.floor >= floorMin && r.floor <= floorMax);
if (!filtered.length) return [];
const scores = filtered.map(r => r.score);
const k = dynamicTopK(scores, 0.85, 5, limit);
return filtered.slice(0, k).map((r, i) => ({
chunkId: r.chunkId,
floor: r.floor,
textRank: i + 1,
score: r.score,
}));
} catch (e) {
console.error('[text-search] Chunk search failed:', e);
return [];
}
}
export function clearChunkTextIndex() {
chunkIdx = null;
chunkIdxRevision = null;
}

View File

@@ -1,5 +1,5 @@
import { xbLog } from '../../../core/debug-core.js';
import { extensionFolderPath } from '../../../core/constants.js';
import { xbLog } from '../../../../core/debug-core.js';
import { extensionFolderPath } from '../../../../core/constants.js';
const MODULE_ID = 'tokenizer';

View File

@@ -8,7 +8,7 @@ import {
chunkVectorsTable,
eventVectorsTable,
CHUNK_MAX_TOKENS,
} from '../data/db.js';
} from '../../data/db.js';
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数

View File

@@ -4,11 +4,11 @@
// StateVector 存 IndexedDB可重建
// ═══════════════════════════════════════════════════════════════════════════
import { saveMetadataDebounced } from '../../../../../../extensions.js';
import { chat_metadata } from '../../../../../../../script.js';
import { stateVectorsTable } from '../data/db.js';
import { EXT_ID } from '../../../core/constants.js';
import { xbLog } from '../../../core/debug-core.js';
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
import { chat_metadata } from '../../../../../../../../script.js';
import { stateVectorsTable } from '../../data/db.js';
import { EXT_ID } from '../../../../core/constants.js';
import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'state-store';

View File

@@ -3,9 +3,9 @@
// 向量数据导入导出(当前 chatId 级别)
// ═══════════════════════════════════════════════════════════════════════════
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs';
import { getContext } from '../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js';
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
import { getContext } from '../../../../../../../extensions.js';
import { xbLog } from '../../../../core/debug-core.js';
import {
getMeta,
updateMeta,
@@ -26,8 +26,8 @@ import {
saveStateVectors,
clearStateVectors,
} from './state-store.js';
import { getEngineFingerprint } from './embedder.js';
import { getVectorConfig } from '../data/config.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-io';
const EXPORT_VERSION = 1;

View File

@@ -3,7 +3,7 @@
// 统一的向量生成接口(本地模型 / 在线服务)
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../core/debug-core.js';
import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'embedding';

View File

@@ -3,7 +3,7 @@
// 跳过用户定义的「起始→结束」区间
// ═══════════════════════════════════════════════════════════════════════════
import { getTextFilterRules } from '../data/config.js';
import { getTextFilterRules } from '../../data/config.js';
/**
* 转义正则特殊字符