Compare commits

...

2 Commits

Author SHA1 Message Date
6171195350 Adjust summary prompt labels and budgets 2026-02-05 13:46:28 +08:00
8137e206f9 Update story summary recall and prompt 2026-02-05 00:22:02 +08:00
18 changed files with 711 additions and 408 deletions

View File

@@ -5,10 +5,11 @@ import { getContext, saveMetadataDebounced } from "../../../../../../extensions.
import { chat_metadata } from "../../../../../../../script.js"; import { chat_metadata } from "../../../../../../../script.js";
import { EXT_ID } from "../../../core/constants.js"; import { EXT_ID } from "../../../core/constants.js";
import { xbLog } from "../../../core/debug-core.js"; import { xbLog } from "../../../core/debug-core.js";
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js"; import { clearEventVectors, deleteEventVectorsByIds } from "../vector/storage/chunk-store.js";
import { clearEventTextIndex } from '../vector/text-search.js'; import { clearEventTextIndex } from '../vector/retrieval/text-search.js';
const MODULE_ID = 'summaryStore'; const MODULE_ID = 'summaryStore';
const FACTS_LIMIT_PER_SUBJECT = 10;
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 基础存取 // 基础存取
@@ -125,32 +126,26 @@ function getNextFactId(existingFacts) {
export function mergeFacts(existingFacts, updates, floor) { export function mergeFacts(existingFacts, updates, floor) {
const map = new Map(); const map = new Map();
// 加载现有 facts
for (const f of existingFacts || []) { for (const f of existingFacts || []) {
if (!f.retracted) { if (!f.retracted) {
map.set(factKey(f), f); map.set(factKey(f), f);
} }
} }
// 获取下一个 ID
let nextId = getNextFactId(existingFacts); let nextId = getNextFactId(existingFacts);
// 应用更新
for (const u of updates || []) { for (const u of updates || []) {
if (!u.s || !u.p) continue; if (!u.s || !u.p) continue;
const key = factKey(u); const key = factKey(u);
// 删除操作
if (u.retracted === true) { if (u.retracted === true) {
map.delete(key); map.delete(key);
continue; continue;
} }
// 无 o 则跳过
if (!u.o || !String(u.o).trim()) continue; if (!u.o || !String(u.o).trim()) continue;
// 覆盖或新增
const existing = map.get(key); const existing = map.get(key);
const newFact = { const newFact = {
id: existing?.id || `f-${nextId++}`, id: existing?.id || `f-${nextId++}`,
@@ -158,14 +153,13 @@ export function mergeFacts(existingFacts, updates, floor) {
p: u.p.trim(), p: u.p.trim(),
o: String(u.o).trim(), o: String(u.o).trim(),
since: floor, since: floor,
_isState: existing?._isState ?? !!u.isState,
}; };
// 关系类保留 trend
if (isRelationFact(newFact) && u.trend) { if (isRelationFact(newFact) && u.trend) {
newFact.trend = u.trend; newFact.trend = u.trend;
} }
// 保留原始 _addedAt如果是更新
if (existing?._addedAt != null) { if (existing?._addedAt != null) {
newFact._addedAt = existing._addedAt; newFact._addedAt = existing._addedAt;
} else { } else {
@@ -175,9 +169,28 @@ export function mergeFacts(existingFacts, updates, floor) {
map.set(key, newFact); map.set(key, newFact);
} }
return Array.from(map.values()); const factsBySubject = new Map();
for (const f of map.values()) {
if (f._isState) continue;
const arr = factsBySubject.get(f.s) || [];
arr.push(f);
factsBySubject.set(f.s, arr);
}
const toRemove = new Set();
for (const arr of factsBySubject.values()) {
if (arr.length > FACTS_LIMIT_PER_SUBJECT) {
arr.sort((a, b) => (a._addedAt || 0) - (b._addedAt || 0));
for (let i = 0; i < arr.length - FACTS_LIMIT_PER_SUBJECT; i++) {
toRemove.add(factKey(arr[i]));
}
}
}
return Array.from(map.values()).filter(f => !toRemove.has(factKey(f)));
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 旧数据迁移 // 旧数据迁移
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════

View File

@@ -32,7 +32,6 @@ function sanitizeFacts(parsed) {
if (!s || !pRaw) continue; if (!s || !pRaw) continue;
// 删除操作
if (item.retracted === true) { if (item.retracted === true) {
ok.push({ s, p: pRaw, retracted: true }); ok.push({ s, p: pRaw, retracted: true });
continue; continue;
@@ -43,11 +42,15 @@ function sanitizeFacts(parsed) {
const relP = normalizeRelationPredicate(pRaw); const relP = normalizeRelationPredicate(pRaw);
const isRel = !!relP; const isRel = !!relP;
const fact = { s, p: isRel ? relP : pRaw, o }; const fact = {
s,
p: isRel ? relP : pRaw,
o,
isState: !!item.isState,
};
// 关系类保留 trend
if (isRel && item.trend) { if (isRel && item.trend) {
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融']; const validTrends = ['??', '??', '??', '??', '??', '??', '??'];
if (validTrends.includes(item.trend)) { if (validTrends.includes(item.trend)) {
fact.trend = item.trend; fact.trend = item.trend;
} }
@@ -59,6 +62,7 @@ function sanitizeFacts(parsed) {
parsed.factUpdates = ok; parsed.factUpdates = ok;
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// causedBy 清洗(事件因果边) // causedBy 清洗(事件因果边)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════

View File

@@ -100,14 +100,19 @@ Acknowledged. Now reviewing the incremental summarization specifications:
├─ progress: 0.0 to 1.0 ├─ progress: 0.0 to 1.0
└─ newMoment: 仅记录本次新增的关键时刻 └─ newMoment: 仅记录本次新增的关键时刻
[Fact Tracking - SPO Triples] [Fact Tracking - SPO ???]
├─ s: 主体(角色名/物品名) ?? ??: ?? & ???????
├─ p: 谓词(属性名) ?? ??: ??????????????????
│ - 关系类只允许对X的看法 / 与X的关系 ?? SPO ??:
├─ o: 值(当前状态) ? s: ??????/????
├─ trend: 仅关系类填写 ? p: ??????????????
├─ retracted: 删除标记 ? o: ???
└─ s+p 为键,相同键会覆盖旧值 ?? KV ??: s+p ??????????
?? isState ????????:
? true = ????????????/??/??/???
? false = ??????????????
?? trend: ?????????/??/??/??/??/??/???
?? retracted: true ???????
Ready to process incremental summary requests with strict deduplication.`, Ready to process incremental summary requests with strict deduplication.`,
@@ -177,26 +182,28 @@ Before generating, observe the USER and analyze carefully:
"arcUpdates": [ "arcUpdates": [
{"name": "角色名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"} {"name": "角色名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"}
], ],
"factUpdates": [ "factUpdates": [
{ {
"s": "主体(角色名/物品名)", "s": "主体",
"p": "谓词(属性名/对X的看法", "p": "谓词(复用已有谓词,避免同义词",
"o": "当前值", "o": "当前值",
"trend": "破裂|厌恶|反感|陌生|投缘|亲密|交融", "isState": true/false,
"retracted": false "trend": "仅关系类:破裂|厌恶|反感|陌生|投缘|亲密|交融"
} }
] ]
} }
\`\`\` \`\`\`
## factUpdates 规则 ## factUpdates 规则
- 目的: 纠错 & 世界一致性约束,只记录硬性事实
- s+p 为键,相同键会覆盖旧值 - s+p 为键,相同键会覆盖旧值
- 状态类s=角色名, p=属性(生死/位置/状态等), o=值 - isState: true=核心约束(位置/身份/生死/关系)false=有容量上限会被清理
- 关系类s=角色A, p="对B的看法" 或 p="与B的关系"trend 仅限关系类 - 关系类: p="对X的看法"trend 必填
- 删除设置 retracted: true(不需要填 o - 删除: 设置 retracted: true
- 只输出有变化的条目 - 谓词规范化: 复用已有谓词,不要发明同义词
- 硬约束才记录,避免叙事化,确保少、硬、稳定 - 只输出有变化的条目,确保少、硬、稳定
## CRITICAL NOTES ## CRITICAL NOTES
- events.id 从 evt-{nextEventId} 开始编号 - events.id 从 evt-{nextEventId} 开始编号
- 仅输出【增量】内容,已有事件绝不重复 - 仅输出【增量】内容,已有事件绝不重复
@@ -267,9 +274,11 @@ function waitForStreamingComplete(sessionId, streamingMod, timeout = 120000) {
function formatFactsForLLM(facts) { function formatFactsForLLM(facts) {
if (!facts?.length) { if (!facts?.length) {
return '(空白,尚无事实记录)'; return { text: '(空白,尚无事实记录)', predicates: [] };
} }
const predicates = [...new Set(facts.map(f => f.p).filter(Boolean))];
const lines = facts.map(f => { const lines = facts.map(f => {
if (f.trend) { if (f.trend) {
return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`; return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`;
@@ -277,11 +286,18 @@ function formatFactsForLLM(facts) {
return `- ${f.s} | ${f.p} | ${f.o}`; return `- ${f.s} | ${f.p} | ${f.o}`;
}); });
return lines.join('\n') || '(空白,尚无事实记录)'; return {
text: lines.join('\n') || '(空白,尚无事实记录)',
predicates,
};
} }
function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) { function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) {
const factsText = formatFactsForLLM(existingFacts); const { text: factsText, predicates } = formatFactsForLLM(existingFacts);
const predicatesHint = predicates.length > 0
? `\n\n<\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>\n${predicates.join('\u3001')}\n</\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>`
: '';
const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat
.replace(/\{nextEventId\}/g, String(nextEventId)); .replace(/\{nextEventId\}/g, String(nextEventId));
@@ -293,9 +309,9 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
{ role: 'system', content: LLM_PROMPT_CONFIG.topSystem }, { role: 'system', content: LLM_PROMPT_CONFIG.topSystem },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc }, { role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary }, { role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary },
{ role: 'user', content: `<已有总结状态>\n${existingSummary}\n</已有总结状态>\n\n<当前事实图谱>\n${factsText}\n</当前事实图谱>` }, { role: 'user', content: `<\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n${existingSummary}\n</\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n\n<\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>\n${factsText}\n</\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>${predicatesHint}` },
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent }, { role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent },
{ role: 'user', content: `<新对话内容>${historyRange}\n${newHistoryText}\n</新对话内容>` } { role: 'user', content: `<\u65b0\u5bf9\u8bdd\u5185\u5bb9>\uff08${historyRange}\uff09\n${newHistoryText}\n</\u65b0\u5bf9\u8bdd\u5185\u5bb9>` }
]; ];
const bottomMessages = [ const bottomMessages = [
@@ -311,6 +327,7 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
}; };
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// JSON 解析 // JSON 解析
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -415,4 +432,4 @@ export async function generateSummary(options) {
console.groupEnd(); console.groupEnd();
return rawOutput; return rawOutput;
} }

View File

@@ -1,6 +1,6 @@
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Prompt Injection (Final Clean Version) // Story Summary - Prompt Injection (Final Clean Version)
// - 仅负责构建注入文本,不负责写入 extension_prompts // - 仅负责"构建注入文本",不负责写入 extension_prompts
// - 注入发生在 story-summary.jsGENERATION_STARTED 时写入 extension_promptsIN_CHAT + depth // - 注入发生在 story-summary.jsGENERATION_STARTED 时写入 extension_promptsIN_CHAT + depth
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -8,8 +8,8 @@ import { getContext } from "../../../../../../extensions.js";
import { xbLog } from "../../../core/debug-core.js"; import { xbLog } from "../../../core/debug-core.js";
import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js"; import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js";
import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js"; import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js";
import { recallMemory, buildQueryText } from "../vector/recall.js"; import { recallMemory, buildQueryText } from "../vector/retrieval/recall.js";
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/chunk-store.js"; import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/storage/chunk-store.js";
const MODULE_ID = "summaryPrompt"; const MODULE_ID = "summaryPrompt";
@@ -32,6 +32,7 @@ function canNotifyRecallFail() {
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
const MAIN_BUDGET_MAX = 10000; // 主装配预算(世界/事件/远期/弧光) const MAIN_BUDGET_MAX = 10000; // 主装配预算(世界/事件/远期/弧光)
const ORPHAN_MAX = 2500; // 远期上限
const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算 const RECENT_ORPHAN_MAX = 5000; // [待整理] 独立预算
const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示) const TOTAL_BUDGET_MAX = 15000; // 总预算(用于日志显示)
const L3_MAX = 2000; const L3_MAX = 2000;
@@ -85,6 +86,49 @@ function cleanSummary(summary) {
.trim(); .trim();
} }
// ─────────────────────────────────────────────────────────────────────────────
// 上下文配对工具函数
// ─────────────────────────────────────────────────────────────────────────────
/**
* 获取chunk的配对楼层
* USER楼层 → 下一楼(AI回复)
* AI楼层 → 上一楼(USER发言)
*/
function getContextFloor(chunk) {
if (chunk.isL0) return -1; // L0虚拟chunk不需要配对
return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1;
}
/**
* 从候选chunks中选择最佳配对
* 策略优先选择相反角色的第一个chunk
*/
function pickContextChunk(candidates, mainChunk) {
if (!candidates?.length) return null;
const targetIsUser = !mainChunk.isUser;
// 优先相反角色
const opposite = candidates.find(c => c.isUser === targetIsUser);
if (opposite) return opposite;
// 否则选第一个
return candidates[0];
}
/**
* 格式化配对chunk完整显示带缩进和方向符号
*/
function formatContextChunkLine(chunk, isAbove) {
const { name1, name2 } = getContext();
const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
const text = String(chunk.text || "").trim();
const symbol = isAbove ? "┌" : "└";
return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`;
}
/**
* 格式化配对chunk缩进简短摘要
*/
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 系统前导与后缀 // 系统前导与后缀
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@@ -131,7 +175,7 @@ function formatArcLine(a) {
.filter(Boolean); .filter(Boolean);
if (moments.length) { if (moments.length) {
return `- ${a.name}${moments.join(" → ")}(当前:${a.trajectory}`; return `- ${a.name}${moments.join(" → ")}`;
} }
return `- ${a.name}${a.trajectory}`; return `- ${a.name}${a.trajectory}`;
} }
@@ -150,7 +194,31 @@ function formatChunkFullLine(c) {
return ` #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`; return ` #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
} }
// 因果事件格式(仅作为“前因线索”展示,仍保留楼层提示) /**
* 格式化chunk及其配对上下文
* 返回数组:[配对行(如果在前), 主chunk行, 配对行(如果在后)]
*/
function formatChunkWithContext(mainChunk, contextChunk) {
const lines = [];
const mainLine = formatChunkFullLine(mainChunk);
if (!contextChunk) {
lines.push(mainLine);
return lines;
}
if (contextChunk.floor < mainChunk.floor) {
lines.push(formatContextChunkLine(contextChunk, true));
lines.push(mainLine);
} else {
lines.push(mainLine);
lines.push(formatContextChunkLine(contextChunk, false));
}
return lines;
}
// 因果事件格式(仅作为"前因线索"展示,仍保留楼层提示)
function formatCausalEventLine(causalItem, causalById) { function formatCausalEventLine(causalItem, causalById) {
const ev = causalItem?.event || {}; const ev = causalItem?.event || {};
const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1)); const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
@@ -172,9 +240,8 @@ function formatCausalEventLine(causalItem, causalById) {
const evidence = causalItem._evidenceChunk; const evidence = causalItem._evidenceChunk;
if (evidence) { if (evidence) {
const speaker = evidence.speaker || "角色"; const speaker = evidence.speaker || "角色";
const preview = String(evidence.text || ""); const text = String(evidence.text || "").trim();
const clip = preview.length > 60 ? preview.slice(0, 60) + "..." : preview; lines.push(`${indent} #${evidence.floor + 1} [${speaker}] ${text}`);
lines.push(`${indent} #${evidence.floor + 1} [${speaker}] ${clip}`);
} }
return lines.join("\n"); return lines.join("\n");
@@ -216,11 +283,13 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount; const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
lines.push(` [3] 远期片段 (已总结范围)`); lines.push(` [3] 远期片段 (已总结范围)`);
lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`); lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
lines.push(` 配对: ${stats.orphans.contextPairs || 0}`);
lines.push(''); lines.push('');
// [4] 待整理 // [4] 待整理
lines.push(` [4] 待整理 (独立预算 5000)`); lines.push(` [4] 待整理 (独立预算 5000)`);
lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`); lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
lines.push(` 配对: ${recentOrphanStats?.contextPairs || 0}`);
lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`); lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
lines.push(''); lines.push('');
@@ -248,7 +317,7 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
return lines.join('\n'); return lines.join('\n');
} }
// 重写事件文本里的序号前缀:把 {idx}. {idx}.【...】 的 idx 替换 // 重写事件文本里的序号前缀:把 "{idx}. ""{idx}.【...】" 的 idx 替换
function renumberEventText(text, newIndex) { function renumberEventText(text, newIndex) {
const s = String(text || ""); const s = String(text || "");
// 匹配行首: "12." 或 "12.【" // 匹配行首: "12." 或 "12.【"
@@ -325,11 +394,12 @@ export function buildNonVectorPromptText() {
return text; return text;
} }
// ───────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光) // 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光)
// ───────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) { async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) {
const { chatId } = getContext();
const data = store.json || {}; const data = store.json || {};
const total = { used: 0, max: MAIN_BUDGET_MAX }; const total = { used: 0, max: MAIN_BUDGET_MAX };
@@ -351,13 +421,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
arcs: { count: 0, tokens: 0 }, arcs: { count: 0, tokens: 0 },
events: { selected: 0, tokens: 0 }, events: { selected: 0, tokens: 0 },
evidence: { attached: 0, tokens: 0 }, evidence: { attached: 0, tokens: 0 },
orphans: { injected: 0, tokens: 0 }, orphans: { injected: 0, tokens: 0, l0Count: 0, contextPairs: 0 },
}; };
const recentOrphanStats = { const recentOrphanStats = {
injected: 0, injected: 0,
tokens: 0, tokens: 0,
floorRange: "N/A", floorRange: "N/A",
contextPairs: 0,
}; };
const details = { const details = {
eventList: [], eventList: [],
@@ -473,14 +544,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
const bestChunk = pickBestChunkForEvent(e.event); const bestChunk = pickBestChunkForEvent(e.event);
// 先尝试带证据 // 先尝试"带证据"
// idx 先占位写 0后面统一按时间线重排后再改号 // idx 先占位写 0后面统一按时间线重排后再改号
let text = formatEventWithEvidence(e, 0, bestChunk); let text = formatEventWithEvidence(e, 0, bestChunk);
let cost = estimateTokens(text); let cost = estimateTokens(text);
let hasEvidence = !!bestChunk; let hasEvidence = !!bestChunk;
let chosenChunk = bestChunk || null; let chosenChunk = bestChunk || null;
// 塞不下就退化成不带证据 // 塞不下就退化成"不带证据"
if (total.used + cost > total.max) { if (total.used + cost > total.max) {
text = formatEventWithEvidence(e, 0, null); text = formatEventWithEvidence(e, 0, null);
cost = estimateTokens(text); cost = estimateTokens(text);
@@ -549,33 +620,90 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
assembled.events.similar = selectedSimilarTexts; assembled.events.similar = selectedSimilarTexts;
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// [优先级 4] 远期片段(已总结范围的 orphan chunks // [优先级 4] 远期片段(已总结范围的 orphan chunks- 带上下文配对
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
const lastSummarized = store.lastSummarizedMesId ?? -1; const lastSummarized = store.lastSummarizedMesId ?? -1;
const lastChunkFloor = meta?.lastChunkFloor ?? -1; const lastChunkFloor = meta?.lastChunkFloor ?? -1;
const keepVisible = store.keepVisibleCount ?? 3; const keepVisible = store.keepVisibleCount ?? 3;
if (chunks.length && total.used < total.max) { // 收集需要配对的楼层
const orphans = chunks const orphanContextFloors = new Set();
.filter(c => !usedChunkIds.has(c.chunkId)) const orphanCandidates = chunks
.filter(c => c.floor <= lastSummarized) .filter(c => !usedChunkIds.has(c.chunkId))
.filter(c => c.floor <= lastSummarized);
for (const c of orphanCandidates) {
if (c.isL0) continue;
const pairFloor = getContextFloor(c);
if (pairFloor >= 0) orphanContextFloors.add(pairFloor);
}
// 批量获取配对楼层的chunks
let contextChunksByFloor = new Map();
if (chatId && orphanContextFloors.size > 0) {
try {
const contextChunks = await getChunksByFloors(chatId, Array.from(orphanContextFloors));
for (const pc of contextChunks) {
if (!contextChunksByFloor.has(pc.floor)) {
contextChunksByFloor.set(pc.floor, []);
}
contextChunksByFloor.get(pc.floor).push(pc);
}
} catch (e) {
xbLog.warn(MODULE_ID, "获取配对chunks失败", e);
}
}
if (orphanCandidates.length && total.used < total.max) {
const orphans = orphanCandidates
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))); .sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
const l1Budget = { used: 0, max: total.max - total.used }; const l1Budget = { used: 0, max: Math.min(ORPHAN_MAX, total.max - total.used) };
let l0Count = 0;
let contextPairsCount = 0;
for (const c of orphans) { for (const c of orphans) {
const line = formatChunkFullLine(c); // L0 不需要配对
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break; if (c.isL0) {
const line = formatChunkFullLine(c);
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
injectionStats.orphans.injected++;
l0Count++;
continue;
}
// 获取配对chunk
const pairFloor = getContextFloor(c);
const candidates = contextChunksByFloor.get(pairFloor) || [];
const contextChunk = pickContextChunk(candidates, c);
// 格式化(带配对)
const formattedLines = formatChunkWithContext(c, contextChunk);
// 尝试添加所有行
let allAdded = true;
for (const line of formattedLines) {
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) {
allAdded = false;
break;
}
}
if (!allAdded) break;
injectionStats.orphans.injected++; injectionStats.orphans.injected++;
if (contextChunk) contextPairsCount++;
} }
assembled.orphans.tokens = l1Budget.used; assembled.orphans.tokens = l1Budget.used;
total.used += l1Budget.used; total.used += l1Budget.used;
injectionStats.orphans.tokens = l1Budget.used; injectionStats.orphans.tokens = l1Budget.used;
injectionStats.orphans.l0Count = l0Count;
injectionStats.orphans.contextPairs = contextPairsCount;
} }
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// [独立预算] 待整理(未总结范围,独立 5000 // [独立预算] 待整理(未总结范围,独立 5000- 带上下文配对
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// 近期范围:(lastSummarized, lastChunkFloor - keepVisible] // 近期范围:(lastSummarized, lastChunkFloor - keepVisible]
@@ -583,55 +711,113 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
const recentEnd = lastChunkFloor - keepVisible; const recentEnd = lastChunkFloor - keepVisible;
if (chunks.length && recentEnd >= recentStart) { if (chunks.length && recentEnd >= recentStart) {
const recentOrphans = chunks const recentOrphanCandidates = chunks
.filter(c => !usedChunkIds.has(c.chunkId)) .filter(c => !usedChunkIds.has(c.chunkId))
.filter(c => c.floor >= recentStart && c.floor <= recentEnd) .filter(c => c.floor >= recentStart && c.floor <= recentEnd);
// 收集近期范围需要配对的楼层
const recentContextFloors = new Set();
for (const c of recentOrphanCandidates) {
if (c.isL0) continue;
const pairFloor = getContextFloor(c);
if (pairFloor >= 0) recentContextFloors.add(pairFloor);
}
// 批量获取(复用已有的 or 新获取)
let recentContextChunksByFloor = new Map();
if (chatId && recentContextFloors.size > 0) {
// 过滤掉已经获取过的
const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f));
if (newFloors.length > 0) {
try {
const newContextChunks = await getChunksByFloors(chatId, newFloors);
for (const pc of newContextChunks) {
if (!contextChunksByFloor.has(pc.floor)) {
contextChunksByFloor.set(pc.floor, []);
}
contextChunksByFloor.get(pc.floor).push(pc);
}
} catch (e) {
xbLog.warn(MODULE_ID, "获取近期配对chunks失败", e);
}
}
recentContextChunksByFloor = contextChunksByFloor;
}
const recentOrphans = recentOrphanCandidates
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))); .sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX }; const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };
let recentContextPairsCount = 0;
for (const c of recentOrphans) { for (const c of recentOrphans) {
const line = formatChunkFullLine(c); // L0 不需要配对
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break; if (c.isL0) {
const line = formatChunkFullLine(c);
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
recentOrphanStats.injected++;
continue;
}
// 获取配对chunk
const pairFloor = getContextFloor(c);
const candidates = recentContextChunksByFloor.get(pairFloor) || [];
const contextChunk = pickContextChunk(candidates, c);
// 格式化(带配对)
const formattedLines = formatChunkWithContext(c, contextChunk);
// 尝试添加所有行
let allAdded = true;
for (const line of formattedLines) {
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) {
allAdded = false;
break;
}
}
if (!allAdded) break;
recentOrphanStats.injected++; recentOrphanStats.injected++;
if (contextChunk) recentContextPairsCount++;
} }
assembled.recentOrphans.tokens = recentBudget.used; assembled.recentOrphans.tokens = recentBudget.used;
recentOrphanStats.tokens = recentBudget.used; recentOrphanStats.tokens = recentBudget.used;
recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}`; recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}`;
recentOrphanStats.contextPairs = recentContextPairsCount;
} }
// ═══════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════════════ // 按注入顺序拼接 sections
// 按注入顺序拼接 sections // ═══════════════════════════════════════════════════════════════════
// ═══════════════════════════════════════════════════════════════════════ const sections = [];
const sections = []; // 1. 世界约束 → 定了的事
// 1. 世界约束 → 定了的事 if (assembled.facts.lines.length) {
if (assembled.facts.lines.length) { sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`);
sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`); }
} // 2. 核心经历 → 印象深的事
// 2. 核心经历 → 印象深的事 if (assembled.events.direct.length) {
if (assembled.events.direct.length) { sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`);
sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`); }
} // 3. 过往背景 → 好像有关的事
// 3. 过往背景 → 好像有关的事 if (assembled.events.similar.length) {
if (assembled.events.similar.length) { sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`);
sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`); }
} // 4. 远期片段 → 更早以前
// 4. 远期片段 → 更早以前 if (assembled.orphans.lines.length) {
if (assembled.orphans.lines.length) { sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`);
sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`); }
} // 5. 待整理 → 近期
// 5. 待整理 → 刚发生的 if (assembled.recentOrphans.lines.length) {
if (assembled.recentOrphans.lines.length) { sections.push(`[近期] 清晰但还没整理\n${assembled.recentOrphans.lines.join("\n")}`);
sections.push(`[刚发生的] 还没来得及想明白\n${assembled.recentOrphans.lines.join("\n")}`); }
} // 6. 人物弧光 → 这些人
// 6. 人物弧光 → 这些人 if (assembled.arcs.lines.length) {
if (assembled.arcs.lines.length) { sections.push(`[这些人] 他们的弧光\n${assembled.arcs.lines.join("\n")}`);
sections.push(`[这些人] 他们现在怎样了\n${assembled.arcs.lines.join("\n")}`); }
}
if (!sections.length) { if (!sections.length) {
return { promptText: "", injectionLogText: "", injectionStats }; return { promptText: "", injectionLogText: "", injectionStats };
} }
@@ -846,3 +1032,4 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") }; return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") };
} }

View File

@@ -54,7 +54,7 @@ import {
fetchOnlineModels, fetchOnlineModels,
isLocalModelLoaded, isLocalModelLoaded,
DEFAULT_LOCAL_MODEL, DEFAULT_LOCAL_MODEL,
} from "./vector/embedder.js"; } from "./vector/utils/embedder.js";
import { import {
getMeta, getMeta,
@@ -66,7 +66,7 @@ import {
saveChunks, saveChunks,
saveChunkVectors, saveChunkVectors,
getStorageStats, getStorageStats,
} from "./vector/chunk-store.js"; } from "./vector/storage/chunk-store.js";
import { import {
buildIncrementalChunks, buildIncrementalChunks,
@@ -75,12 +75,12 @@ import {
syncOnMessageDeleted, syncOnMessageDeleted,
syncOnMessageSwiped, syncOnMessageSwiped,
syncOnMessageReceived, syncOnMessageReceived,
} from "./vector/chunk-builder.js"; } from "./vector/pipeline/chunk-builder.js";
import { initStateIntegration, rebuildStateVectors } from "./vector/state-integration.js"; import { initStateIntegration, rebuildStateVectors } from "./vector/pipeline/state-integration.js";
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/state-store.js"; import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/storage/state-store.js";
// vector io // vector io
import { exportVectors, importVectors } from "./vector/vector-io.js"; import { exportVectors, importVectors } from "./vector/storage/vector-io.js";
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 常量 // 常量

View File

@@ -3,7 +3,7 @@
// 标准 RAG chunking: ~200 tokens per chunk // 标准 RAG chunking: ~200 tokens per chunk
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { import {
getMeta, getMeta,
updateMeta, updateMeta,
@@ -15,10 +15,10 @@ import {
makeChunkId, makeChunkId,
hashText, hashText,
CHUNK_MAX_TOKENS, CHUNK_MAX_TOKENS,
} from './chunk-store.js'; } from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js'; import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { filterText } from './text-filter.js'; import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'chunk-builder'; const MODULE_ID = 'chunk-builder';
@@ -339,7 +339,7 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
// 本地模型未加载时跳过(避免意外触发下载或报错) // 本地模型未加载时跳过(避免意外触发下载或报错)
if (vectorConfig.engine === "local") { if (vectorConfig.engine === "local") {
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("./embedder.js"); const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL; const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
if (!isLocalModelLoaded(modelId)) return; if (!isLocalModelLoaded(modelId)) return;
} }

View File

@@ -3,8 +3,8 @@
// 事件监听 + 回滚钩子注册 // 事件监听 + 回滚钩子注册
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { import {
saveStateAtoms, saveStateAtoms,
saveStateVectors, saveStateVectors,
@@ -12,9 +12,9 @@ import {
deleteStateVectorsFromFloor, deleteStateVectorsFromFloor,
getStateAtoms, getStateAtoms,
clearStateVectors, clearStateVectors,
} from './state-store.js'; } from '../storage/state-store.js';
import { embed, getEngineFingerprint } from './embedder.js'; import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { getVectorConfig } from '../data/config.js'; import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'state-integration'; const MODULE_ID = 'state-integration';

View File

@@ -3,11 +3,11 @@
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换 // L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { getAllStateVectors, getStateAtoms } from './state-store.js'; import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
import { getMeta } from './chunk-store.js'; import { getMeta } from '../storage/chunk-store.js';
import { getEngineFingerprint } from './embedder.js'; import { getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'state-recall'; const MODULE_ID = 'state-recall';

View File

@@ -1,4 +1,4 @@
// Story Summary - Recall Engine // Story Summary - Recall Engine
// L1 chunk + L2 event 召回 // L1 chunk + L2 event 召回
// - 全量向量打分 // - 全量向量打分
// - 实体权重归一化分配 // - 实体权重归一化分配
@@ -8,19 +8,19 @@
// - MMR 去重(融合后执行) // - MMR 去重(融合后执行)
// - floor 稀疏去重 // - floor 稀疏去重
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js'; import { getAllChunks, getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
import { embed, getEngineFingerprint } from './embedder.js'; import { embed, getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../data/store.js'; import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../../data/store.js';
import { filterText } from './text-filter.js'; import { filterText } from '../utils/text-filter.js';
import { import {
searchStateAtoms, searchStateAtoms,
buildL0FloorBonus, buildL0FloorBonus,
stateToVirtualChunks, stateToVirtualChunks,
mergeAndSparsify, mergeAndSparsify,
} from './state-recall.js'; } from '../pipeline/state-recall.js';
import { ensureEventTextIndex, searchEventsByText } from './text-search.js'; import { ensureEventTextIndex, searchEventsByText, ensureChunkTextIndex, searchChunksByText } from './text-search.js';
import { import {
extractRareTerms, extractRareTerms,
extractNounsFromFactsO, extractNounsFromFactsO,
@@ -29,10 +29,8 @@ import {
const MODULE_ID = 'recall'; const MODULE_ID = 'recall';
const CONFIG = { const CONFIG = {
QUERY_MSG_COUNT: 5, QUERY_MSG_COUNT: 3,
QUERY_DECAY_BETA: 0.7, QUERY_DECAY_BETA: 0.6,
QUERY_MAX_CHARS: 600,
QUERY_CONTEXT_CHARS: 240,
CAUSAL_CHAIN_MAX_DEPTH: 10, CAUSAL_CHAIN_MAX_DEPTH: 10,
CAUSAL_INJECT_MAX: 30, CAUSAL_INJECT_MAX: 30,
@@ -216,11 +214,26 @@ function extractRelationTarget(p) {
return ''; return '';
} }
function buildExpDecayWeights(n, beta) { function buildContentAwareWeights(segments, beta = 0.6) {
const n = segments.length;
if (n === 0) return [];
if (n === 1) return [1.0];
const last = n - 1; const last = n - 1;
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last))); const SHORT_THRESHOLD = 15;
const sum = w.reduce((a, b) => a + b, 0) || 1; const raw = [];
return w.map(x => x / sum);
for (let i = 0; i < n; i++) {
const posWeight = Math.exp(beta * (i - last));
const len = String(segments[i] || '').replace(/\s+/g, '').length;
const contentFactor = len >= SHORT_THRESHOLD
? 1.0
: Math.max(0.3, Math.sqrt(len / SHORT_THRESHOLD));
raw.push(posWeight * contentFactor);
}
const sum = raw.reduce((a, b) => a + b, 0) || 1;
return raw.map(w => w / sum);
} }
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -247,19 +260,16 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
} }
} }
return messages.slice(-count).map((m, idx, arr) => { return messages.slice(-count)
const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色"); .map((m) => cleanForRecall(m.mes) || '')
const clean = cleanForRecall(m.mes); .filter(Boolean);
if (!clean) return '';
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
return `${speaker}: ${clean.slice(0, limit)}`;
}).filter(Boolean);
} }
async function embedWeightedQuery(segments, vectorConfig) { async function embedWeightedQuery(segments, vectorConfig) {
if (!segments?.length) return null; if (!segments?.length) return null;
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA); const weights = buildContentAwareWeights(segments, CONFIG.QUERY_DECAY_BETA);
const vecs = await embed(segments, vectorConfig); const vecs = await embed(segments, vectorConfig);
const dims = vecs?.[0]?.length || 0; const dims = vecs?.[0]?.length || 0;
if (!dims) return null; if (!dims) return null;
@@ -377,19 +387,6 @@ function expandByFacts(presentEntities, facts, maxDepth = 2) {
// 实体权重归一化(用于加分分配) // 实体权重归一化(用于加分分配)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
function normalizeEntityWeights(queryEntityWeights) {
if (!queryEntityWeights?.size) return new Map();
const total = Array.from(queryEntityWeights.values()).reduce((a, b) => a + b, 0);
if (total <= 0) return new Map();
const normalized = new Map();
for (const [entity, weight] of queryEntityWeights) {
normalized.set(entity, weight / total);
}
return normalized;
}
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 文本路 Query 构建(分层高信号词) // 文本路 Query 构建(分层高信号词)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
@@ -548,7 +545,167 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索 // L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) { async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
// 构建/更新文本索引
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
ensureEventTextIndex(allEvents, revision);
// 文本路检索
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
const textGapInfo = textRanked._gapInfo || null;
// 向量路检索
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
// L0 加权
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
break;
}
}
}
const participants = (event.participants || []).map(p => normalize(p));
const hasPresent = participants.some(p => queryEntitySet.has(p));
return {
_id: event.id,
_idx: idx,
event,
rawSim,
finalScore: rawSim + bonus,
vector: v,
_hasPresent: hasPresent,
};
});
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
const preFilterDistribution = {
total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
threshold: CONFIG.MIN_SIMILARITY_EVENT,
};
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
const vectorRanked = candidates.map(s => ({
event: s.event,
similarity: s.finalScore,
rawSim: s.rawSim,
vector: s.vector,
}));
const eventById = new Map(allEvents.map(e => [e.id, e]));
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
// TEXT-only 质量门槛
const textOnlyStats = {
total: 0,
passedSoftCheck: 0,
filtered: 0,
};
const filtered = fused.filter(x => {
if (x.type !== 'TEXT') return true;
textOnlyStats.total++;
const sim = x.rawSim || rawSimById.get(x.id) || 0;
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
textOnlyStats.passedSoftCheck++;
return true;
}
textOnlyStats.filtered++;
return false;
});
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector || null,
c => c.rrf
);
// TEXT-only 限额MMR 后执行)
let textOnlyCount = 0;
let textOnlyTruncated = 0;
const finalResults = mmrOutput.filter(x => {
if (x.type !== 'TEXT') return true;
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
textOnlyCount++;
return true;
}
textOnlyTruncated++;
return false;
});
textOnlyStats.finalIncluded = textOnlyCount;
textOnlyStats.truncatedByLimit = textOnlyTruncated;
const results = finalResults.map(x => ({
event: x.event,
similarity: x.rrf,
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
_recallReason: x.type,
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
_rawSim: rawSimById.get(x.event?.id) || 0,
}));
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyTotal: textOnlyStats.total,
};
results[0]._textOnlyStats = textOnlyStats;
results[0]._textGapInfo = textGapInfo;
}
return results;
}
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1, textSearchParams = null) {
const { chatId } = getContext(); const { chatId } = getContext();
if (!chatId || !queryVector?.length) return []; if (!chatId || !queryVector?.length) return [];
@@ -577,6 +734,58 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
}; };
}); });
// 文本路补充(仅待整理区)
let textL1Stats = null;
const store = getSummaryStore();
const keepVisible = store?.keepVisibleCount ?? 3;
const recentStart = lastSummarizedFloor + 1;
const recentEnd = (meta?.lastChunkFloor ?? -1) - keepVisible;
if (textSearchParams && recentEnd >= recentStart && recentEnd >= 0) {
const { queryEntities, rareTerms } = textSearchParams;
const textQuery = [...(queryEntities || []), ...(rareTerms || [])].join(' ');
if (textQuery.trim()) {
const allChunks = await getAllChunks(chatId);
const recentChunks = allChunks.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
if (recentChunks.length > 0) {
const revision = `${chatId}:chunk:${recentEnd}`;
ensureChunkTextIndex(recentChunks, revision);
const textHits = searchChunksByText(textQuery, recentStart, recentEnd, 20);
textL1Stats = {
range: `${recentStart + 1}~${recentEnd + 1}`,
candidates: recentChunks.length,
hits: textHits.length,
};
for (const hit of textHits) {
const existingIdx = scored.findIndex(s => s.chunkId === hit.chunkId);
if (existingIdx >= 0) {
scored[existingIdx]._hasTextHit = true;
scored[existingIdx]._textRank = hit.textRank;
} else {
scored.push({
_id: hit.chunkId,
chunkId: hit.chunkId,
floor: hit.floor,
chunkIdx: 0,
similarity: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
_baseSimilarity: 0,
_l0Bonus: 0,
_recallReason: 'TEXT_L1',
_textRank: hit.textRank,
vector: null,
});
}
}
}
}
}
const candidates = scored const candidates = scored
.filter(s => { .filter(s => {
const threshold = s.floor > lastSummarizedFloor const threshold = s.floor > lastSummarizedFloor
@@ -599,6 +808,7 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length, '0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
'<0.55': scored.filter(s => s.similarity < 0.55).length, '<0.55': scored.filter(s => s.similarity < 0.55).length,
}, },
textL1: textL1Stats,
}; };
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length); const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
@@ -636,6 +846,8 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
isUser: chunk.isUser, isUser: chunk.isUser,
text: chunk.text, text: chunk.text,
similarity: item.similarity, similarity: item.similarity,
_recallReason: item._recallReason,
_textRank: item._textRank,
}; };
}).filter(Boolean); }).filter(Boolean);
@@ -646,184 +858,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
return results; return results;
} }
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索RRF 混合 + MMR 后置)
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
// 构建/更新文本索引
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
ensureEventTextIndex(allEvents, revision);
// 文本路检索
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
const textGapInfo = textRanked._gapInfo || null;
// ═══════════════════════════════════════════════════════════════════════
// 向量路检索(只保留 L0 加权)
// ═══════════════════════════════════════════════════════════════════════
const ENTITY_BONUS_POOL = 0.10;
const scored = (allEvents || []).map((event, idx) => {
const v = vectorMap.get(event.id);
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
let bonus = 0;
// L0 加权
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
break;
}
}
}
const participants = (event.participants || []).map(p => normalize(p));
let maxEntityWeight = 0;
for (const p of participants) {
const w = normalizedEntityWeights.get(p) || 0;
if (w > maxEntityWeight) {
maxEntityWeight = w;
}
}
const entityBonus = ENTITY_BONUS_POOL * maxEntityWeight;
bonus += entityBonus;
return {
_id: event.id,
_idx: idx,
event,
rawSim,
finalScore: rawSim + bonus,
vector: v,
_entityBonus: entityBonus,
_hasPresent: maxEntityWeight > 0,
};
});
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
const entityBonusById = new Map(scored.map(s => [s._id, s._entityBonus]));
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
const preFilterDistribution = {
total: scored.length,
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
threshold: CONFIG.MIN_SIMILARITY_EVENT,
};
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
const vectorRanked = candidates.map(s => ({
event: s.event,
similarity: s.finalScore,
rawSim: s.rawSim,
vector: s.vector,
}));
const eventById = new Map(allEvents.map(e => [e.id, e]));
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
const textOnlyStats = {
total: 0,
passedSoftCheck: 0,
filtered: 0,
finalIncluded: 0,
truncatedByLimit: 0,
};
const filtered = fused.filter(x => {
if (x.type !== 'TEXT') return true;
textOnlyStats.total++;
const sim = x.rawSim || rawSimById.get(x.id) || 0;
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
textOnlyStats.passedSoftCheck++;
return true;
}
textOnlyStats.filtered++;
return false;
});
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
...x,
_id: x.id,
}));
const mmrOutput = mmrSelect(
mmrInput,
CONFIG.MAX_EVENTS,
CONFIG.MMR_LAMBDA,
c => c.vector || null,
c => c.rrf
);
let textOnlyCount = 0;
const finalResults = mmrOutput.filter(x => {
if (x.type !== 'TEXT') return true;
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
textOnlyCount++;
return true;
}
textOnlyStats.truncatedByLimit++;
return false;
});
textOnlyStats.finalIncluded = textOnlyCount;
const results = finalResults.map(x => ({
event: x.event,
similarity: x.rrf,
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
_recallReason: x.type,
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
_entityBonus: entityBonusById.get(x.event?.id) || 0,
_rawSim: rawSimById.get(x.event?.id) || 0,
}));
// 统计信息附加到第一条结果
if (results.length > 0) {
results[0]._preFilterDistribution = preFilterDistribution;
results[0]._rrfStats = {
vectorCount: vectorRanked.length,
textCount: textRanked.length,
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
textOnlyTotal: textOnlyStats.total,
};
results[0]._textOnlyStats = textOnlyStats;
results[0]._textGapInfo = textGapInfo;
}
return results;
}
// ═══════════════════════════════════════════════════════════════════════════
// 日志
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({ function formatRecallLog({
elapsed, elapsed,
segments, segments,
@@ -831,7 +865,7 @@ function formatRecallLog({
chunkResults, chunkResults,
eventResults, eventResults,
allEvents, allEvents,
normalizedEntityWeights = new Map(), queryEntities = [],
causalEvents = [], causalEvents = [],
chunkPreFilterStats = null, chunkPreFilterStats = null,
l0Results = [], l0Results = [],
@@ -840,15 +874,15 @@ function formatRecallLog({
textQueryBreakdown = null, textQueryBreakdown = null,
}) { }) {
const lines = [ const lines = [
'\u2554' + '\u2550'.repeat(62) + '\u2557', '' + ''.repeat(62) + '',
'\u2551 记忆召回报告 \u2551', ' 记忆召回报告 ',
'\u2560' + '\u2550'.repeat(62) + '\u2563', '' + ''.repeat(62) + '',
`\u2551 耗时: ${elapsed}ms`, ` 耗时: ${elapsed}ms`,
'\u255a' + '\u2550'.repeat(62) + '\u255d', '' + ''.repeat(62) + '',
'', '',
'\u250c' + '\u2500'.repeat(61) + '\u2510', '' + ''.repeat(61) + '',
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502', ` 【查询构建】最近 ${CONFIG.QUERY_MSG_COUNT} 条,内容感知加权 (β=${CONFIG.QUERY_DECAY_BETA}) │`,
'\u2514' + '\u2500'.repeat(61) + '\u2518', '' + ''.repeat(61) + '',
]; ];
const segmentsSorted = segments.map((s, i) => ({ const segmentsSorted = segments.map((s, i) => ({
@@ -858,25 +892,19 @@ function formatRecallLog({
})).sort((a, b) => b.weight - a.weight); })).sort((a, b) => b.weight - a.weight);
segmentsSorted.forEach((s, rank) => { segmentsSorted.forEach((s, rank) => {
const bar = '\u2588'.repeat(Math.round(s.weight * 20)); const bar = ''.repeat(Math.round(s.weight * 20));
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text; const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
const marker = rank === 0 ? ' ◀ 主导' : ''; const marker = rank === 0 ? ' ◀ 主导' : '';
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`); lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
}); });
lines.push(''); lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); lines.push('' + ''.repeat(61) + '');
lines.push('\u2502 【提取实体】 \u2502'); lines.push(' 【提取实体】 ');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); lines.push('' + ''.repeat(61) + '');
if (normalizedEntityWeights?.size) { if (queryEntities?.length) {
const sorted = Array.from(normalizedEntityWeights.entries()) lines.push(` 焦点: ${queryEntities.slice(0, 8).join('、')}${queryEntities.length > 8 ? ' ...' : ''}`);
.sort((a, b) => b[1] - a[1])
.slice(0, 8);
const formatted = sorted
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
.join(' | ');
lines.push(` ${formatted}`);
} else { } else {
lines.push(' (无)'); lines.push(' (无)');
} }
@@ -885,9 +913,9 @@ function formatRecallLog({
} }
lines.push(''); lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510'); lines.push('' + ''.repeat(61) + '');
lines.push('\u2502 【文本路 Query 构成】 \u2502'); lines.push(' 【文本路 Query 构成】 ');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518'); lines.push('' + ''.repeat(61) + '');
if (textQueryBreakdown) { if (textQueryBreakdown) {
const bd = textQueryBreakdown; const bd = textQueryBreakdown;
@@ -919,23 +947,9 @@ function formatRecallLog({
} }
lines.push(''); lines.push('');
lines.push(' 实体归一化(用于加分):'); lines.push('┌' + '─'.repeat(61) + '┐');
if (normalizedEntityWeights?.size) { lines.push('│ 【召回统计】 │');
const sorted = Array.from(normalizedEntityWeights.entries()) lines.push('└' + '─'.repeat(61) + '┘');
.sort((a, b) => b[1] - a[1])
.slice(0, 8);
const formatted = sorted
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
.join(' | ');
lines.push(` ${formatted}`);
} else {
lines.push(' (无)');
}
lines.push('');
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
lines.push('\u2502 【召回统计】 \u2502');
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
// L0 // L0
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b); const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
@@ -953,6 +967,11 @@ function formatRecallLog({
const dist = chunkPreFilterStats.distribution || {}; const dist = chunkPreFilterStats.distribution || {};
lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length}`); lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length}`);
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`); lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
const textL1 = chunkPreFilterStats.textL1;
if (textL1) {
lines.push(` 文本路补充(待整理区): 范围 ${textL1.range}楼 | 候选 ${textL1.candidates} 条 | 命中 ${textL1.hits}`);
}
} else { } else {
lines.push(` 选入: ${chunkResults.length}`); lines.push(` 选入: ${chunkResults.length}`);
} }
@@ -988,9 +1007,6 @@ function formatRecallLog({
lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`); lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
}); });
} }
const entityBoostedEvents = eventResults.filter(e => e._entityBonus > 0).length;
lines.push('');
lines.push(` 实体加分事件: ${entityBoostedEvents}`);
if (textGapInfo) { if (textGapInfo) {
lines.push(''); lines.push('');
@@ -1002,7 +1018,6 @@ function formatRecallLog({
} }
} }
// Causal
if (causalEvents.length) { if (causalEvents.length) {
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0)); const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0)); const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
@@ -1012,13 +1027,8 @@ function formatRecallLog({
} }
lines.push(''); lines.push('');
return lines.join('\n'); return lines.join("\n");
} }
// ═══════════════════════════════════════════════════════════════════════════
// 主入口
// ═══════════════════════════════════════════════════════════════════════════
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) { export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
const T0 = performance.now(); const T0 = performance.now();
const { chat } = getContext(); const { chat } = getContext();
@@ -1049,9 +1059,9 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
const lexicon = buildEntityLexicon(store, allEvents); const lexicon = buildEntityLexicon(store, allEvents);
const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon); const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
const queryEntities = Array.from(queryEntityWeights.keys()); const queryEntities = Array.from(queryEntityWeights.keys());
const queryEntitySet = new Set(queryEntities.map(normalize));
const facts = getFacts(store); const facts = getFacts(store);
const expandedTerms = expandByFacts(queryEntities, facts, 2); const expandedTerms = expandByFacts(queryEntities, facts, 2);
const normalizedEntityWeights = normalizeEntityWeights(queryEntityWeights);
let queryTextForSearch = ''; let queryTextForSearch = '';
let textQueryBreakdown = null; let textQueryBreakdown = null;
@@ -1079,8 +1089,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
} }
const [chunkResults, eventResults] = await Promise.all([ const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor), searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor, {
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus), queryEntities,
rareTerms: textQueryBreakdown?.rareTerms || [],
}),
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus),
]); ]);
const chunkPreFilterStats = chunkResults._preFilterStats || null; const chunkPreFilterStats = chunkResults._preFilterStats || null;
@@ -1118,7 +1131,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
chunkResults: mergedChunks, chunkResults: mergedChunks,
eventResults, eventResults,
allEvents, allEvents,
normalizedEntityWeights, queryEntities,
causalEvents: causalEventsTruncated, causalEvents: causalEventsTruncated,
chunkPreFilterStats, chunkPreFilterStats,
l0Results, l0Results,
@@ -1149,3 +1162,8 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
return `${speaker}: ${text.slice(0, 500)}`; return `${speaker}: ${text.slice(0, 500)}`;
}).filter(Boolean).join('\n'); }).filter(Boolean).join('\n');
} }

View File

@@ -1,6 +1,6 @@
// text-search.js - 最终版 // text-search.js - 最终版
import MiniSearch from '../../../libs/minisearch.mjs'; import MiniSearch from '../../../../libs/minisearch.mjs';
const STOP_WORDS = new Set([ const STOP_WORDS = new Set([
'的', '了', '是', '在', '和', '与', '或', '但', '而', '却', '的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
@@ -106,7 +106,7 @@ export function ensureEventTextIndex(events, revision) {
* *
* 参考帕累托法则80/20 法则在信息检索中的应用 * 参考帕累托法则80/20 法则在信息检索中的应用
*/ */
function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) { export function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
if (!scores.length) return 0; if (!scores.length) return 0;
const total = scores.reduce((a, b) => a + b, 0); const total = scores.reduce((a, b) => a + b, 0);
@@ -171,3 +171,67 @@ export function clearEventTextIndex() {
idx = null; idx = null;
lastRevision = null; lastRevision = null;
} }
// ---------------------------------------------------------------------------
// Chunk 文本索引(待整理区 L1 补充)
// ---------------------------------------------------------------------------
let chunkIdx = null;
let chunkIdxRevision = null;
export function ensureChunkTextIndex(chunks, revision) {
if (chunkIdx && revision === chunkIdxRevision) return;
try {
chunkIdx = new MiniSearch({
fields: ['text'],
storeFields: ['chunkId', 'floor'],
tokenize,
searchOptions: { tokenize },
});
chunkIdx.addAll(chunks.map(c => ({
id: c.chunkId,
chunkId: c.chunkId,
floor: c.floor,
text: c.text || '',
})));
chunkIdxRevision = revision;
} catch (e) {
console.error('[text-search] Chunk index build failed:', e);
chunkIdx = null;
}
}
export function searchChunksByText(query, floorMin, floorMax, limit = 20) {
if (!chunkIdx || !query?.trim()) return [];
try {
const results = chunkIdx.search(query, {
fuzzy: false,
prefix: false,
});
const filtered = results.filter(r => r.floor >= floorMin && r.floor <= floorMax);
if (!filtered.length) return [];
const scores = filtered.map(r => r.score);
const k = dynamicTopK(scores, 0.85, 5, limit);
return filtered.slice(0, k).map((r, i) => ({
chunkId: r.chunkId,
floor: r.floor,
textRank: i + 1,
score: r.score,
}));
} catch (e) {
console.error('[text-search] Chunk search failed:', e);
return [];
}
}
export function clearChunkTextIndex() {
chunkIdx = null;
chunkIdxRevision = null;
}

View File

@@ -1,5 +1,5 @@
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { extensionFolderPath } from '../../../core/constants.js'; import { extensionFolderPath } from '../../../../core/constants.js';
const MODULE_ID = 'tokenizer'; const MODULE_ID = 'tokenizer';

View File

@@ -8,7 +8,7 @@ import {
chunkVectorsTable, chunkVectorsTable,
eventVectorsTable, eventVectorsTable,
CHUNK_MAX_TOKENS, CHUNK_MAX_TOKENS,
} from '../data/db.js'; } from '../../data/db.js';
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
// 工具函数 // 工具函数

View File

@@ -4,11 +4,11 @@
// StateVector 存 IndexedDB可重建 // StateVector 存 IndexedDB可重建
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { saveMetadataDebounced } from '../../../../../../extensions.js'; import { saveMetadataDebounced } from '../../../../../../../extensions.js';
import { chat_metadata } from '../../../../../../../script.js'; import { chat_metadata } from '../../../../../../../../script.js';
import { stateVectorsTable } from '../data/db.js'; import { stateVectorsTable } from '../../data/db.js';
import { EXT_ID } from '../../../core/constants.js'; import { EXT_ID } from '../../../../core/constants.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'state-store'; const MODULE_ID = 'state-store';

View File

@@ -3,9 +3,9 @@
// 向量数据导入导出(当前 chatId 级别) // 向量数据导入导出(当前 chatId 级别)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs'; import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
import { getContext } from '../../../../../../extensions.js'; import { getContext } from '../../../../../../../extensions.js';
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
import { import {
getMeta, getMeta,
updateMeta, updateMeta,
@@ -26,8 +26,8 @@ import {
saveStateVectors, saveStateVectors,
clearStateVectors, clearStateVectors,
} from './state-store.js'; } from './state-store.js';
import { getEngineFingerprint } from './embedder.js'; import { getEngineFingerprint } from '../utils/embedder.js';
import { getVectorConfig } from '../data/config.js'; import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-io'; const MODULE_ID = 'vector-io';
const EXPORT_VERSION = 1; const EXPORT_VERSION = 1;

View File

@@ -3,7 +3,7 @@
// 统一的向量生成接口(本地模型 / 在线服务) // 统一的向量生成接口(本地模型 / 在线服务)
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../core/debug-core.js'; import { xbLog } from '../../../../core/debug-core.js';
const MODULE_ID = 'embedding'; const MODULE_ID = 'embedding';

View File

@@ -3,7 +3,7 @@
// 跳过用户定义的「起始→结束」区间 // 跳过用户定义的「起始→结束」区间
// ═══════════════════════════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════════════════════════
import { getTextFilterRules } from '../data/config.js'; import { getTextFilterRules } from '../../data/config.js';
/** /**
* 转义正则特殊字符 * 转义正则特殊字符