Update story summary recall and prompt
This commit is contained in:
@@ -5,10 +5,11 @@ import { getContext, saveMetadataDebounced } from "../../../../../../extensions.
|
||||
import { chat_metadata } from "../../../../../../../script.js";
|
||||
import { EXT_ID } from "../../../core/constants.js";
|
||||
import { xbLog } from "../../../core/debug-core.js";
|
||||
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js";
|
||||
import { clearEventTextIndex } from '../vector/text-search.js';
|
||||
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/storage/chunk-store.js";
|
||||
import { clearEventTextIndex } from '../vector/retrieval/text-search.js';
|
||||
|
||||
const MODULE_ID = 'summaryStore';
|
||||
const FACTS_LIMIT_PER_SUBJECT = 10;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 基础存取
|
||||
@@ -125,32 +126,26 @@ function getNextFactId(existingFacts) {
|
||||
export function mergeFacts(existingFacts, updates, floor) {
|
||||
const map = new Map();
|
||||
|
||||
// 加载现有 facts
|
||||
for (const f of existingFacts || []) {
|
||||
if (!f.retracted) {
|
||||
map.set(factKey(f), f);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取下一个 ID
|
||||
let nextId = getNextFactId(existingFacts);
|
||||
|
||||
// 应用更新
|
||||
for (const u of updates || []) {
|
||||
if (!u.s || !u.p) continue;
|
||||
|
||||
const key = factKey(u);
|
||||
|
||||
// 删除操作
|
||||
if (u.retracted === true) {
|
||||
map.delete(key);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 无 o 则跳过
|
||||
if (!u.o || !String(u.o).trim()) continue;
|
||||
|
||||
// 覆盖或新增
|
||||
const existing = map.get(key);
|
||||
const newFact = {
|
||||
id: existing?.id || `f-${nextId++}`,
|
||||
@@ -158,14 +153,13 @@ export function mergeFacts(existingFacts, updates, floor) {
|
||||
p: u.p.trim(),
|
||||
o: String(u.o).trim(),
|
||||
since: floor,
|
||||
_isState: existing?._isState ?? !!u.isState,
|
||||
};
|
||||
|
||||
// 关系类保留 trend
|
||||
if (isRelationFact(newFact) && u.trend) {
|
||||
newFact.trend = u.trend;
|
||||
}
|
||||
|
||||
// 保留原始 _addedAt(如果是更新)
|
||||
if (existing?._addedAt != null) {
|
||||
newFact._addedAt = existing._addedAt;
|
||||
} else {
|
||||
@@ -175,9 +169,28 @@ export function mergeFacts(existingFacts, updates, floor) {
|
||||
map.set(key, newFact);
|
||||
}
|
||||
|
||||
return Array.from(map.values());
|
||||
const factsBySubject = new Map();
|
||||
for (const f of map.values()) {
|
||||
if (f._isState) continue;
|
||||
const arr = factsBySubject.get(f.s) || [];
|
||||
arr.push(f);
|
||||
factsBySubject.set(f.s, arr);
|
||||
}
|
||||
|
||||
const toRemove = new Set();
|
||||
for (const arr of factsBySubject.values()) {
|
||||
if (arr.length > FACTS_LIMIT_PER_SUBJECT) {
|
||||
arr.sort((a, b) => (a._addedAt || 0) - (b._addedAt || 0));
|
||||
for (let i = 0; i < arr.length - FACTS_LIMIT_PER_SUBJECT; i++) {
|
||||
toRemove.add(factKey(arr[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(map.values()).filter(f => !toRemove.has(factKey(f)));
|
||||
}
|
||||
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 旧数据迁移
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@@ -32,7 +32,6 @@ function sanitizeFacts(parsed) {
|
||||
|
||||
if (!s || !pRaw) continue;
|
||||
|
||||
// 删除操作
|
||||
if (item.retracted === true) {
|
||||
ok.push({ s, p: pRaw, retracted: true });
|
||||
continue;
|
||||
@@ -43,11 +42,15 @@ function sanitizeFacts(parsed) {
|
||||
|
||||
const relP = normalizeRelationPredicate(pRaw);
|
||||
const isRel = !!relP;
|
||||
const fact = { s, p: isRel ? relP : pRaw, o };
|
||||
const fact = {
|
||||
s,
|
||||
p: isRel ? relP : pRaw,
|
||||
o,
|
||||
isState: !!item.isState,
|
||||
};
|
||||
|
||||
// 关系类保留 trend
|
||||
if (isRel && item.trend) {
|
||||
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
|
||||
const validTrends = ['??', '??', '??', '??', '??', '??', '??'];
|
||||
if (validTrends.includes(item.trend)) {
|
||||
fact.trend = item.trend;
|
||||
}
|
||||
@@ -59,6 +62,7 @@ function sanitizeFacts(parsed) {
|
||||
parsed.factUpdates = ok;
|
||||
}
|
||||
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// causedBy 清洗(事件因果边)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@@ -100,14 +100,19 @@ Acknowledged. Now reviewing the incremental summarization specifications:
|
||||
├─ progress: 0.0 to 1.0
|
||||
└─ newMoment: 仅记录本次新增的关键时刻
|
||||
|
||||
[Fact Tracking - SPO Triples]
|
||||
├─ s: 主体(角色名/物品名)
|
||||
├─ p: 谓词(属性名)
|
||||
│ - 关系类只允许:对X的看法 / 与X的关系
|
||||
├─ o: 值(当前状态)
|
||||
├─ trend: 仅关系类填写
|
||||
├─ retracted: 删除标记
|
||||
└─ s+p 为键,相同键会覆盖旧值
|
||||
[Fact Tracking - SPO ???]
|
||||
?? ??: ?? & ???????
|
||||
?? ??: ??????????????????
|
||||
?? SPO ??:
|
||||
? s: ??????/????
|
||||
? p: ??????????????
|
||||
? o: ???
|
||||
?? KV ??: s+p ??????????
|
||||
?? isState ????????:
|
||||
? true = ????????????/??/??/???
|
||||
? false = ??????????????
|
||||
?? trend: ?????????/??/??/??/??/??/???
|
||||
?? retracted: true ???????
|
||||
|
||||
Ready to process incremental summary requests with strict deduplication.`,
|
||||
|
||||
@@ -177,26 +182,28 @@ Before generating, observe the USER and analyze carefully:
|
||||
"arcUpdates": [
|
||||
{"name": "角色名", "trajectory": "当前阶段描述(15字内)", "progress": 0.0-1.0, "newMoment": "本次新增的关键时刻"}
|
||||
],
|
||||
"factUpdates": [
|
||||
"factUpdates": [
|
||||
{
|
||||
"s": "主体(角色名/物品名)",
|
||||
"p": "谓词(属性名/对X的看法)",
|
||||
"s": "主体",
|
||||
"p": "谓词(复用已有谓词,避免同义词)",
|
||||
"o": "当前值",
|
||||
"trend": "破裂|厌恶|反感|陌生|投缘|亲密|交融",
|
||||
"retracted": false
|
||||
"isState": true/false,
|
||||
"trend": "仅关系类:破裂|厌恶|反感|陌生|投缘|亲密|交融"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
\`\`\`
|
||||
|
||||
## factUpdates 规则
|
||||
- 目的: 纠错 & 世界一致性约束,只记录硬性事实
|
||||
- s+p 为键,相同键会覆盖旧值
|
||||
- 状态类:s=角色名, p=属性(生死/位置/状态等), o=值
|
||||
- 关系类:s=角色A, p="对B的看法" 或 p="与B的关系",trend 仅限关系类
|
||||
- 删除:设置 retracted: true(不需要填 o)
|
||||
- 只输出有变化的条目
|
||||
- 硬约束才记录,避免叙事化,确保少、硬、稳定
|
||||
|
||||
- isState: true=核心约束(位置/身份/生死/关系),false=有容量上限会被清理
|
||||
- 关系类: p="对X的看法",trend 必填
|
||||
- 删除: 设置 retracted: true
|
||||
- 谓词规范化: 复用已有谓词,不要发明同义词
|
||||
- 只输出有变化的条目,确保少、硬、稳定
|
||||
## CRITICAL NOTES
|
||||
- events.id 从 evt-{nextEventId} 开始编号
|
||||
- 仅输出【增量】内容,已有事件绝不重复
|
||||
@@ -267,9 +274,11 @@ function waitForStreamingComplete(sessionId, streamingMod, timeout = 120000) {
|
||||
|
||||
function formatFactsForLLM(facts) {
|
||||
if (!facts?.length) {
|
||||
return '(空白,尚无事实记录)';
|
||||
return { text: '(空白,尚无事实记录)', predicates: [] };
|
||||
}
|
||||
|
||||
const predicates = [...new Set(facts.map(f => f.p).filter(Boolean))];
|
||||
|
||||
const lines = facts.map(f => {
|
||||
if (f.trend) {
|
||||
return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`;
|
||||
@@ -277,11 +286,18 @@ function formatFactsForLLM(facts) {
|
||||
return `- ${f.s} | ${f.p} | ${f.o}`;
|
||||
});
|
||||
|
||||
return lines.join('\n') || '(空白,尚无事实记录)';
|
||||
return {
|
||||
text: lines.join('\n') || '(空白,尚无事实记录)',
|
||||
predicates,
|
||||
};
|
||||
}
|
||||
|
||||
function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) {
|
||||
const factsText = formatFactsForLLM(existingFacts);
|
||||
const { text: factsText, predicates } = formatFactsForLLM(existingFacts);
|
||||
|
||||
const predicatesHint = predicates.length > 0
|
||||
? `\n\n<\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>\n${predicates.join('\u3001')}\n</\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>`
|
||||
: '';
|
||||
|
||||
const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat
|
||||
.replace(/\{nextEventId\}/g, String(nextEventId));
|
||||
@@ -293,9 +309,9 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
|
||||
{ role: 'system', content: LLM_PROMPT_CONFIG.topSystem },
|
||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc },
|
||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary },
|
||||
{ role: 'user', content: `<已有总结状态>\n${existingSummary}\n</已有总结状态>\n\n<当前事实图谱>\n${factsText}\n</当前事实图谱>` },
|
||||
{ role: 'user', content: `<\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n${existingSummary}\n</\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n\n<\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>\n${factsText}\n</\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>${predicatesHint}` },
|
||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent },
|
||||
{ role: 'user', content: `<新对话内容>(${historyRange})\n${newHistoryText}\n</新对话内容>` }
|
||||
{ role: 'user', content: `<\u65b0\u5bf9\u8bdd\u5185\u5bb9>\uff08${historyRange}\uff09\n${newHistoryText}\n</\u65b0\u5bf9\u8bdd\u5185\u5bb9>` }
|
||||
];
|
||||
|
||||
const bottomMessages = [
|
||||
@@ -311,6 +327,7 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// JSON 解析
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -415,4 +432,4 @@ export async function generateSummary(options) {
|
||||
console.groupEnd();
|
||||
|
||||
return rawOutput;
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - Prompt Injection (Final Clean Version)
|
||||
// - 仅负责“构建注入文本”,不负责写入 extension_prompts
|
||||
// - 仅负责"构建注入文本",不负责写入 extension_prompts
|
||||
// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts(IN_CHAT + depth)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@@ -8,8 +8,8 @@ import { getContext } from "../../../../../../extensions.js";
|
||||
import { xbLog } from "../../../core/debug-core.js";
|
||||
import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js";
|
||||
import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js";
|
||||
import { recallMemory, buildQueryText } from "../vector/recall.js";
|
||||
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/chunk-store.js";
|
||||
import { recallMemory, buildQueryText } from "../vector/retrieval/recall.js";
|
||||
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/storage/chunk-store.js";
|
||||
|
||||
const MODULE_ID = "summaryPrompt";
|
||||
|
||||
@@ -85,6 +85,49 @@ function cleanSummary(summary) {
|
||||
.trim();
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 上下文配对工具函数
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 获取chunk的配对楼层
|
||||
* USER楼层 → 下一楼(AI回复)
|
||||
* AI楼层 → 上一楼(USER发言)
|
||||
*/
|
||||
function getContextFloor(chunk) {
|
||||
if (chunk.isL0) return -1; // L0虚拟chunk不需要配对
|
||||
return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从候选chunks中选择最佳配对
|
||||
* 策略:优先选择相反角色的第一个chunk
|
||||
*/
|
||||
function pickContextChunk(candidates, mainChunk) {
|
||||
if (!candidates?.length) return null;
|
||||
const targetIsUser = !mainChunk.isUser;
|
||||
// 优先相反角色
|
||||
const opposite = candidates.find(c => c.isUser === targetIsUser);
|
||||
if (opposite) return opposite;
|
||||
// 否则选第一个
|
||||
return candidates[0];
|
||||
}
|
||||
/**
|
||||
* 格式化配对chunk(完整显示,带缩进和方向符号)
|
||||
*/
|
||||
function formatContextChunkLine(chunk, isAbove) {
|
||||
const { name1, name2 } = getContext();
|
||||
const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
|
||||
const text = String(chunk.text || "").trim();
|
||||
const symbol = isAbove ? "┌" : "└";
|
||||
return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化配对chunk(缩进,简短摘要)
|
||||
*/
|
||||
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 系统前导与后缀
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -150,7 +193,31 @@ function formatChunkFullLine(c) {
|
||||
return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
|
||||
}
|
||||
|
||||
// 因果事件格式(仅作为“前因线索”展示,仍保留楼层提示)
|
||||
/**
|
||||
* 格式化chunk及其配对上下文
|
||||
* 返回数组:[配对行(如果在前), 主chunk行, 配对行(如果在后)]
|
||||
*/
|
||||
function formatChunkWithContext(mainChunk, contextChunk) {
|
||||
const lines = [];
|
||||
const mainLine = formatChunkFullLine(mainChunk);
|
||||
|
||||
if (!contextChunk) {
|
||||
lines.push(mainLine);
|
||||
return lines;
|
||||
}
|
||||
|
||||
if (contextChunk.floor < mainChunk.floor) {
|
||||
lines.push(formatContextChunkLine(contextChunk, true));
|
||||
lines.push(mainLine);
|
||||
} else {
|
||||
lines.push(mainLine);
|
||||
lines.push(formatContextChunkLine(contextChunk, false));
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
// 因果事件格式(仅作为"前因线索"展示,仍保留楼层提示)
|
||||
function formatCausalEventLine(causalItem, causalById) {
|
||||
const ev = causalItem?.event || {};
|
||||
const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
|
||||
@@ -172,9 +239,8 @@ function formatCausalEventLine(causalItem, causalById) {
|
||||
const evidence = causalItem._evidenceChunk;
|
||||
if (evidence) {
|
||||
const speaker = evidence.speaker || "角色";
|
||||
const preview = String(evidence.text || "");
|
||||
const clip = preview.length > 60 ? preview.slice(0, 60) + "..." : preview;
|
||||
lines.push(`${indent} › #${evidence.floor + 1} [${speaker}] ${clip}`);
|
||||
const text = String(evidence.text || "").trim();
|
||||
lines.push(`${indent} › #${evidence.floor + 1} [${speaker}] ${text}`);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
@@ -216,11 +282,13 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
|
||||
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
|
||||
lines.push(` [3] 远期片段 (已总结范围)`);
|
||||
lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
|
||||
lines.push(` 配对: ${stats.orphans.contextPairs || 0} 条`);
|
||||
lines.push('');
|
||||
|
||||
// [4] 待整理
|
||||
lines.push(` [4] 待整理 (独立预算 5000)`);
|
||||
lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
|
||||
lines.push(` 配对: ${recentOrphanStats?.contextPairs || 0} 条`);
|
||||
lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
|
||||
lines.push('');
|
||||
|
||||
@@ -248,7 +316,7 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
|
||||
// 重写事件文本里的序号前缀:把 "{idx}. " 或 "{idx}.【...】" 的 idx 替换
|
||||
function renumberEventText(text, newIndex) {
|
||||
const s = String(text || "");
|
||||
// 匹配行首: "12." 或 "12.【"
|
||||
@@ -325,11 +393,12 @@ export function buildNonVectorPromptText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光)
|
||||
// ─────────────────────────────────────────────────────────────
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) {
|
||||
const { chatId } = getContext();
|
||||
const data = store.json || {};
|
||||
const total = { used: 0, max: MAIN_BUDGET_MAX };
|
||||
|
||||
@@ -351,13 +420,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
||||
arcs: { count: 0, tokens: 0 },
|
||||
events: { selected: 0, tokens: 0 },
|
||||
evidence: { attached: 0, tokens: 0 },
|
||||
orphans: { injected: 0, tokens: 0 },
|
||||
orphans: { injected: 0, tokens: 0, l0Count: 0, contextPairs: 0 },
|
||||
};
|
||||
|
||||
const recentOrphanStats = {
|
||||
injected: 0,
|
||||
tokens: 0,
|
||||
floorRange: "N/A",
|
||||
contextPairs: 0,
|
||||
};
|
||||
const details = {
|
||||
eventList: [],
|
||||
@@ -473,14 +543,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
||||
|
||||
const bestChunk = pickBestChunkForEvent(e.event);
|
||||
|
||||
// 先尝试“带证据”
|
||||
// 先尝试"带证据"
|
||||
// idx 先占位写 0,后面统一按时间线重排后再改号
|
||||
let text = formatEventWithEvidence(e, 0, bestChunk);
|
||||
let cost = estimateTokens(text);
|
||||
let hasEvidence = !!bestChunk;
|
||||
let chosenChunk = bestChunk || null;
|
||||
|
||||
// 塞不下就退化成“不带证据”
|
||||
// 塞不下就退化成"不带证据"
|
||||
if (total.used + cost > total.max) {
|
||||
text = formatEventWithEvidence(e, 0, null);
|
||||
cost = estimateTokens(text);
|
||||
@@ -549,33 +619,90 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
||||
assembled.events.similar = selectedSimilarTexts;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// [优先级 4] 远期片段(已总结范围的 orphan chunks)
|
||||
// [优先级 4] 远期片段(已总结范围的 orphan chunks)- 带上下文配对
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
const lastSummarized = store.lastSummarizedMesId ?? -1;
|
||||
const lastChunkFloor = meta?.lastChunkFloor ?? -1;
|
||||
const keepVisible = store.keepVisibleCount ?? 3;
|
||||
|
||||
if (chunks.length && total.used < total.max) {
|
||||
const orphans = chunks
|
||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||
.filter(c => c.floor <= lastSummarized)
|
||||
// 收集需要配对的楼层
|
||||
const orphanContextFloors = new Set();
|
||||
const orphanCandidates = chunks
|
||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||
.filter(c => c.floor <= lastSummarized);
|
||||
|
||||
for (const c of orphanCandidates) {
|
||||
if (c.isL0) continue;
|
||||
const pairFloor = getContextFloor(c);
|
||||
if (pairFloor >= 0) orphanContextFloors.add(pairFloor);
|
||||
}
|
||||
|
||||
// 批量获取配对楼层的chunks
|
||||
let contextChunksByFloor = new Map();
|
||||
if (chatId && orphanContextFloors.size > 0) {
|
||||
try {
|
||||
const contextChunks = await getChunksByFloors(chatId, Array.from(orphanContextFloors));
|
||||
for (const pc of contextChunks) {
|
||||
if (!contextChunksByFloor.has(pc.floor)) {
|
||||
contextChunksByFloor.set(pc.floor, []);
|
||||
}
|
||||
contextChunksByFloor.get(pc.floor).push(pc);
|
||||
}
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, "获取配对chunks失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (orphanCandidates.length && total.used < total.max) {
|
||||
const orphans = orphanCandidates
|
||||
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
||||
|
||||
const l1Budget = { used: 0, max: total.max - total.used };
|
||||
let l0Count = 0;
|
||||
let contextPairsCount = 0;
|
||||
|
||||
for (const c of orphans) {
|
||||
const line = formatChunkFullLine(c);
|
||||
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
|
||||
// L0 不需要配对
|
||||
if (c.isL0) {
|
||||
const line = formatChunkFullLine(c);
|
||||
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
|
||||
injectionStats.orphans.injected++;
|
||||
l0Count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 获取配对chunk
|
||||
const pairFloor = getContextFloor(c);
|
||||
const candidates = contextChunksByFloor.get(pairFloor) || [];
|
||||
const contextChunk = pickContextChunk(candidates, c);
|
||||
|
||||
// 格式化(带配对)
|
||||
const formattedLines = formatChunkWithContext(c, contextChunk);
|
||||
|
||||
// 尝试添加所有行
|
||||
let allAdded = true;
|
||||
for (const line of formattedLines) {
|
||||
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) {
|
||||
allAdded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allAdded) break;
|
||||
|
||||
injectionStats.orphans.injected++;
|
||||
if (contextChunk) contextPairsCount++;
|
||||
}
|
||||
|
||||
assembled.orphans.tokens = l1Budget.used;
|
||||
total.used += l1Budget.used;
|
||||
injectionStats.orphans.tokens = l1Budget.used;
|
||||
injectionStats.orphans.l0Count = l0Count;
|
||||
injectionStats.orphans.contextPairs = contextPairsCount;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// [独立预算] 待整理(未总结范围,独立 5000)
|
||||
// [独立预算] 待整理(未总结范围,独立 5000)- 带上下文配对
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
// 近期范围:(lastSummarized, lastChunkFloor - keepVisible]
|
||||
@@ -583,55 +710,113 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
||||
const recentEnd = lastChunkFloor - keepVisible;
|
||||
|
||||
if (chunks.length && recentEnd >= recentStart) {
|
||||
const recentOrphans = chunks
|
||||
const recentOrphanCandidates = chunks
|
||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||
.filter(c => c.floor >= recentStart && c.floor <= recentEnd)
|
||||
.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
|
||||
|
||||
// 收集近期范围需要配对的楼层
|
||||
const recentContextFloors = new Set();
|
||||
for (const c of recentOrphanCandidates) {
|
||||
if (c.isL0) continue;
|
||||
const pairFloor = getContextFloor(c);
|
||||
if (pairFloor >= 0) recentContextFloors.add(pairFloor);
|
||||
}
|
||||
|
||||
// 批量获取(复用已有的 or 新获取)
|
||||
let recentContextChunksByFloor = new Map();
|
||||
if (chatId && recentContextFloors.size > 0) {
|
||||
// 过滤掉已经获取过的
|
||||
const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f));
|
||||
if (newFloors.length > 0) {
|
||||
try {
|
||||
const newContextChunks = await getChunksByFloors(chatId, newFloors);
|
||||
for (const pc of newContextChunks) {
|
||||
if (!contextChunksByFloor.has(pc.floor)) {
|
||||
contextChunksByFloor.set(pc.floor, []);
|
||||
}
|
||||
contextChunksByFloor.get(pc.floor).push(pc);
|
||||
}
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, "获取近期配对chunks失败", e);
|
||||
}
|
||||
}
|
||||
recentContextChunksByFloor = contextChunksByFloor;
|
||||
}
|
||||
|
||||
const recentOrphans = recentOrphanCandidates
|
||||
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
||||
|
||||
const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };
|
||||
let recentContextPairsCount = 0;
|
||||
|
||||
for (const c of recentOrphans) {
|
||||
const line = formatChunkFullLine(c);
|
||||
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
|
||||
// L0 不需要配对
|
||||
if (c.isL0) {
|
||||
const line = formatChunkFullLine(c);
|
||||
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
|
||||
recentOrphanStats.injected++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 获取配对chunk
|
||||
const pairFloor = getContextFloor(c);
|
||||
const candidates = recentContextChunksByFloor.get(pairFloor) || [];
|
||||
const contextChunk = pickContextChunk(candidates, c);
|
||||
|
||||
// 格式化(带配对)
|
||||
const formattedLines = formatChunkWithContext(c, contextChunk);
|
||||
|
||||
// 尝试添加所有行
|
||||
let allAdded = true;
|
||||
for (const line of formattedLines) {
|
||||
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) {
|
||||
allAdded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allAdded) break;
|
||||
|
||||
recentOrphanStats.injected++;
|
||||
if (contextChunk) recentContextPairsCount++;
|
||||
}
|
||||
|
||||
assembled.recentOrphans.tokens = recentBudget.used;
|
||||
recentOrphanStats.tokens = recentBudget.used;
|
||||
recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
|
||||
recentOrphanStats.contextPairs = recentContextPairsCount;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// 按注入顺序拼接 sections
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
const sections = [];
|
||||
// 1. 世界约束 → 定了的事
|
||||
if (assembled.facts.lines.length) {
|
||||
sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`);
|
||||
}
|
||||
// 2. 核心经历 → 印象深的事
|
||||
if (assembled.events.direct.length) {
|
||||
sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`);
|
||||
}
|
||||
// 3. 过往背景 → 好像有关的事
|
||||
if (assembled.events.similar.length) {
|
||||
sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`);
|
||||
}
|
||||
// 4. 远期片段 → 更早以前
|
||||
if (assembled.orphans.lines.length) {
|
||||
sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`);
|
||||
}
|
||||
// 5. 待整理 → 刚发生的
|
||||
if (assembled.recentOrphans.lines.length) {
|
||||
sections.push(`[刚发生的] 还没来得及想明白\n${assembled.recentOrphans.lines.join("\n")}`);
|
||||
}
|
||||
// 6. 人物弧光 → 这些人
|
||||
if (assembled.arcs.lines.length) {
|
||||
sections.push(`[这些人] 他们现在怎样了\n${assembled.arcs.lines.join("\n")}`);
|
||||
}
|
||||
// 按注入顺序拼接 sections
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
const sections = [];
|
||||
// 1. 世界约束 → 定了的事
|
||||
if (assembled.facts.lines.length) {
|
||||
sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`);
|
||||
}
|
||||
// 2. 核心经历 → 印象深的事
|
||||
if (assembled.events.direct.length) {
|
||||
sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`);
|
||||
}
|
||||
// 3. 过往背景 → 好像有关的事
|
||||
if (assembled.events.similar.length) {
|
||||
sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`);
|
||||
}
|
||||
// 4. 远期片段 → 更早以前
|
||||
if (assembled.orphans.lines.length) {
|
||||
sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`);
|
||||
}
|
||||
// 5. 待整理 → 刚发生的
|
||||
if (assembled.recentOrphans.lines.length) {
|
||||
sections.push(`[刚发生的] 还没来得及想明白\n${assembled.recentOrphans.lines.join("\n")}`);
|
||||
}
|
||||
// 6. 人物弧光 → 这些人
|
||||
if (assembled.arcs.lines.length) {
|
||||
sections.push(`[这些人] 他们现在怎样了\n${assembled.arcs.lines.join("\n")}`);
|
||||
}
|
||||
|
||||
if (!sections.length) {
|
||||
if (!sections.length) {
|
||||
return { promptText: "", injectionLogText: "", injectionStats };
|
||||
}
|
||||
|
||||
@@ -846,3 +1031,4 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
||||
|
||||
return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") };
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ import {
|
||||
fetchOnlineModels,
|
||||
isLocalModelLoaded,
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
} from "./vector/embedder.js";
|
||||
} from "./vector/utils/embedder.js";
|
||||
|
||||
import {
|
||||
getMeta,
|
||||
@@ -66,7 +66,7 @@ import {
|
||||
saveChunks,
|
||||
saveChunkVectors,
|
||||
getStorageStats,
|
||||
} from "./vector/chunk-store.js";
|
||||
} from "./vector/storage/chunk-store.js";
|
||||
|
||||
import {
|
||||
buildIncrementalChunks,
|
||||
@@ -75,12 +75,12 @@ import {
|
||||
syncOnMessageDeleted,
|
||||
syncOnMessageSwiped,
|
||||
syncOnMessageReceived,
|
||||
} from "./vector/chunk-builder.js";
|
||||
import { initStateIntegration, rebuildStateVectors } from "./vector/state-integration.js";
|
||||
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/state-store.js";
|
||||
} from "./vector/pipeline/chunk-builder.js";
|
||||
import { initStateIntegration, rebuildStateVectors } from "./vector/pipeline/state-integration.js";
|
||||
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/storage/state-store.js";
|
||||
|
||||
// vector io
|
||||
import { exportVectors, importVectors } from "./vector/vector-io.js";
|
||||
import { exportVectors, importVectors } from "./vector/storage/vector-io.js";
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 常量
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// 标准 RAG chunking: ~200 tokens per chunk
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import {
|
||||
getMeta,
|
||||
updateMeta,
|
||||
@@ -15,10 +15,10 @@ import {
|
||||
makeChunkId,
|
||||
hashText,
|
||||
CHUNK_MAX_TOKENS,
|
||||
} from './chunk-store.js';
|
||||
import { embed, getEngineFingerprint } from './embedder.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { filterText } from './text-filter.js';
|
||||
} from '../storage/chunk-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
|
||||
const MODULE_ID = 'chunk-builder';
|
||||
|
||||
@@ -339,7 +339,7 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
|
||||
|
||||
// 本地模型未加载时跳过(避免意外触发下载或报错)
|
||||
if (vectorConfig.engine === "local") {
|
||||
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("./embedder.js");
|
||||
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
|
||||
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
|
||||
if (!isLocalModelLoaded(modelId)) return;
|
||||
}
|
||||
@@ -3,8 +3,8 @@
|
||||
// 事件监听 + 回滚钩子注册
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import {
|
||||
saveStateAtoms,
|
||||
saveStateVectors,
|
||||
@@ -12,9 +12,9 @@ import {
|
||||
deleteStateVectorsFromFloor,
|
||||
getStateAtoms,
|
||||
clearStateVectors,
|
||||
} from './state-store.js';
|
||||
import { embed, getEngineFingerprint } from './embedder.js';
|
||||
import { getVectorConfig } from '../data/config.js';
|
||||
} from '../storage/state-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'state-integration';
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getAllStateVectors, getStateAtoms } from './state-store.js';
|
||||
import { getMeta } from './chunk-store.js';
|
||||
import { getEngineFingerprint } from './embedder.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
|
||||
import { getMeta } from '../storage/chunk-store.js';
|
||||
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'state-recall';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Story Summary - Recall Engine
|
||||
// Story Summary - Recall Engine
|
||||
// L1 chunk + L2 event 召回
|
||||
// - 全量向量打分
|
||||
// - 实体权重归一化分配
|
||||
@@ -8,19 +8,19 @@
|
||||
// - MMR 去重(融合后执行)
|
||||
// - floor 稀疏去重
|
||||
|
||||
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
|
||||
import { embed, getEngineFingerprint } from './embedder.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../data/store.js';
|
||||
import { filterText } from './text-filter.js';
|
||||
import { getAllChunks, getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
|
||||
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../../data/store.js';
|
||||
import { filterText } from '../utils/text-filter.js';
|
||||
import {
|
||||
searchStateAtoms,
|
||||
buildL0FloorBonus,
|
||||
stateToVirtualChunks,
|
||||
mergeAndSparsify,
|
||||
} from './state-recall.js';
|
||||
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
|
||||
} from '../pipeline/state-recall.js';
|
||||
import { ensureEventTextIndex, searchEventsByText, ensureChunkTextIndex, searchChunksByText } from './text-search.js';
|
||||
import {
|
||||
extractRareTerms,
|
||||
extractNounsFromFactsO,
|
||||
@@ -29,10 +29,8 @@ import {
|
||||
const MODULE_ID = 'recall';
|
||||
|
||||
const CONFIG = {
|
||||
QUERY_MSG_COUNT: 5,
|
||||
QUERY_DECAY_BETA: 0.7,
|
||||
QUERY_MAX_CHARS: 600,
|
||||
QUERY_CONTEXT_CHARS: 240,
|
||||
QUERY_MSG_COUNT: 3,
|
||||
QUERY_DECAY_BETA: 0.6,
|
||||
|
||||
CAUSAL_CHAIN_MAX_DEPTH: 10,
|
||||
CAUSAL_INJECT_MAX: 30,
|
||||
@@ -216,11 +214,26 @@ function extractRelationTarget(p) {
|
||||
return '';
|
||||
}
|
||||
|
||||
function buildExpDecayWeights(n, beta) {
|
||||
function buildContentAwareWeights(segments, beta = 0.6) {
|
||||
const n = segments.length;
|
||||
if (n === 0) return [];
|
||||
if (n === 1) return [1.0];
|
||||
|
||||
const last = n - 1;
|
||||
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
|
||||
const sum = w.reduce((a, b) => a + b, 0) || 1;
|
||||
return w.map(x => x / sum);
|
||||
const SHORT_THRESHOLD = 15;
|
||||
const raw = [];
|
||||
|
||||
for (let i = 0; i < n; i++) {
|
||||
const posWeight = Math.exp(beta * (i - last));
|
||||
const len = String(segments[i] || '').replace(/\s+/g, '').length;
|
||||
const contentFactor = len >= SHORT_THRESHOLD
|
||||
? 1.0
|
||||
: Math.max(0.3, Math.sqrt(len / SHORT_THRESHOLD));
|
||||
raw.push(posWeight * contentFactor);
|
||||
}
|
||||
|
||||
const sum = raw.reduce((a, b) => a + b, 0) || 1;
|
||||
return raw.map(w => w / sum);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -247,19 +260,16 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
|
||||
}
|
||||
}
|
||||
|
||||
return messages.slice(-count).map((m, idx, arr) => {
|
||||
const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色");
|
||||
const clean = cleanForRecall(m.mes);
|
||||
if (!clean) return '';
|
||||
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
|
||||
return `${speaker}: ${clean.slice(0, limit)}`;
|
||||
}).filter(Boolean);
|
||||
return messages.slice(-count)
|
||||
.map((m) => cleanForRecall(m.mes) || '')
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
async function embedWeightedQuery(segments, vectorConfig) {
|
||||
if (!segments?.length) return null;
|
||||
|
||||
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
|
||||
const weights = buildContentAwareWeights(segments, CONFIG.QUERY_DECAY_BETA);
|
||||
|
||||
const vecs = await embed(segments, vectorConfig);
|
||||
const dims = vecs?.[0]?.length || 0;
|
||||
if (!dims) return null;
|
||||
@@ -377,19 +387,6 @@ function expandByFacts(presentEntities, facts, maxDepth = 2) {
|
||||
// 实体权重归一化(用于加分分配)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function normalizeEntityWeights(queryEntityWeights) {
|
||||
if (!queryEntityWeights?.size) return new Map();
|
||||
|
||||
const total = Array.from(queryEntityWeights.values()).reduce((a, b) => a + b, 0);
|
||||
if (total <= 0) return new Map();
|
||||
|
||||
const normalized = new Map();
|
||||
for (const [entity, weight] of queryEntityWeights) {
|
||||
normalized.set(entity, weight / total);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 文本路 Query 构建(分层高信号词)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -548,7 +545,167 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
||||
// L1 Chunks 检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
|
||||
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus = new Map()) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
if (!vectorMap.size) return [];
|
||||
|
||||
// 构建/更新文本索引
|
||||
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
|
||||
ensureEventTextIndex(allEvents, revision);
|
||||
|
||||
// 文本路检索
|
||||
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
|
||||
const textGapInfo = textRanked._gapInfo || null;
|
||||
|
||||
// 向量路检索
|
||||
const scored = (allEvents || []).map((event, idx) => {
|
||||
const v = vectorMap.get(event.id);
|
||||
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
|
||||
|
||||
let bonus = 0;
|
||||
|
||||
// L0 加权
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (range) {
|
||||
for (let f = range.start; f <= range.end; f++) {
|
||||
if (l0FloorBonus.has(f)) {
|
||||
bonus += l0FloorBonus.get(f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const participants = (event.participants || []).map(p => normalize(p));
|
||||
const hasPresent = participants.some(p => queryEntitySet.has(p));
|
||||
|
||||
return {
|
||||
_id: event.id,
|
||||
_idx: idx,
|
||||
event,
|
||||
rawSim,
|
||||
finalScore: rawSim + bonus,
|
||||
vector: v,
|
||||
_hasPresent: hasPresent,
|
||||
};
|
||||
});
|
||||
|
||||
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
|
||||
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
|
||||
|
||||
const preFilterDistribution = {
|
||||
total: scored.length,
|
||||
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
||||
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
|
||||
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
|
||||
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
|
||||
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
|
||||
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
|
||||
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
||||
};
|
||||
|
||||
const candidates = scored
|
||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
|
||||
const vectorRanked = candidates.map(s => ({
|
||||
event: s.event,
|
||||
similarity: s.finalScore,
|
||||
rawSim: s.rawSim,
|
||||
vector: s.vector,
|
||||
}));
|
||||
|
||||
const eventById = new Map(allEvents.map(e => [e.id, e]));
|
||||
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
|
||||
|
||||
// TEXT-only 质量门槛
|
||||
const textOnlyStats = {
|
||||
total: 0,
|
||||
passedSoftCheck: 0,
|
||||
filtered: 0,
|
||||
};
|
||||
|
||||
const filtered = fused.filter(x => {
|
||||
if (x.type !== 'TEXT') return true;
|
||||
|
||||
textOnlyStats.total++;
|
||||
|
||||
const sim = x.rawSim || rawSimById.get(x.id) || 0;
|
||||
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
|
||||
textOnlyStats.passedSoftCheck++;
|
||||
return true;
|
||||
}
|
||||
|
||||
textOnlyStats.filtered++;
|
||||
return false;
|
||||
});
|
||||
|
||||
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
|
||||
...x,
|
||||
_id: x.id,
|
||||
}));
|
||||
|
||||
const mmrOutput = mmrSelect(
|
||||
mmrInput,
|
||||
CONFIG.MAX_EVENTS,
|
||||
CONFIG.MMR_LAMBDA,
|
||||
c => c.vector || null,
|
||||
c => c.rrf
|
||||
);
|
||||
|
||||
// TEXT-only 限额(MMR 后执行)
|
||||
let textOnlyCount = 0;
|
||||
let textOnlyTruncated = 0;
|
||||
|
||||
const finalResults = mmrOutput.filter(x => {
|
||||
if (x.type !== 'TEXT') return true;
|
||||
|
||||
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
|
||||
textOnlyCount++;
|
||||
return true;
|
||||
}
|
||||
|
||||
textOnlyTruncated++;
|
||||
return false;
|
||||
});
|
||||
|
||||
textOnlyStats.finalIncluded = textOnlyCount;
|
||||
textOnlyStats.truncatedByLimit = textOnlyTruncated;
|
||||
|
||||
const results = finalResults.map(x => ({
|
||||
event: x.event,
|
||||
similarity: x.rrf,
|
||||
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
|
||||
_recallReason: x.type,
|
||||
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
|
||||
_rawSim: rawSimById.get(x.event?.id) || 0,
|
||||
}));
|
||||
|
||||
if (results.length > 0) {
|
||||
results[0]._preFilterDistribution = preFilterDistribution;
|
||||
results[0]._rrfStats = {
|
||||
vectorCount: vectorRanked.length,
|
||||
textCount: textRanked.length,
|
||||
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
|
||||
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
|
||||
textOnlyTotal: textOnlyStats.total,
|
||||
};
|
||||
results[0]._textOnlyStats = textOnlyStats;
|
||||
results[0]._textGapInfo = textGapInfo;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1, textSearchParams = null) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
@@ -577,6 +734,58 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
||||
};
|
||||
});
|
||||
|
||||
// 文本路补充(仅待整理区)
|
||||
let textL1Stats = null;
|
||||
const store = getSummaryStore();
|
||||
const keepVisible = store?.keepVisibleCount ?? 3;
|
||||
const recentStart = lastSummarizedFloor + 1;
|
||||
const recentEnd = (meta?.lastChunkFloor ?? -1) - keepVisible;
|
||||
|
||||
if (textSearchParams && recentEnd >= recentStart && recentEnd >= 0) {
|
||||
const { queryEntities, rareTerms } = textSearchParams;
|
||||
const textQuery = [...(queryEntities || []), ...(rareTerms || [])].join(' ');
|
||||
|
||||
if (textQuery.trim()) {
|
||||
const allChunks = await getAllChunks(chatId);
|
||||
const recentChunks = allChunks.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
|
||||
|
||||
if (recentChunks.length > 0) {
|
||||
const revision = `${chatId}:chunk:${recentEnd}`;
|
||||
ensureChunkTextIndex(recentChunks, revision);
|
||||
|
||||
const textHits = searchChunksByText(textQuery, recentStart, recentEnd, 20);
|
||||
|
||||
textL1Stats = {
|
||||
range: `${recentStart + 1}~${recentEnd + 1}`,
|
||||
candidates: recentChunks.length,
|
||||
hits: textHits.length,
|
||||
};
|
||||
|
||||
for (const hit of textHits) {
|
||||
const existingIdx = scored.findIndex(s => s.chunkId === hit.chunkId);
|
||||
|
||||
if (existingIdx >= 0) {
|
||||
scored[existingIdx]._hasTextHit = true;
|
||||
scored[existingIdx]._textRank = hit.textRank;
|
||||
} else {
|
||||
scored.push({
|
||||
_id: hit.chunkId,
|
||||
chunkId: hit.chunkId,
|
||||
floor: hit.floor,
|
||||
chunkIdx: 0,
|
||||
similarity: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
|
||||
_baseSimilarity: 0,
|
||||
_l0Bonus: 0,
|
||||
_recallReason: 'TEXT_L1',
|
||||
_textRank: hit.textRank,
|
||||
vector: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const candidates = scored
|
||||
.filter(s => {
|
||||
const threshold = s.floor > lastSummarizedFloor
|
||||
@@ -599,6 +808,7 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
||||
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
|
||||
'<0.55': scored.filter(s => s.similarity < 0.55).length,
|
||||
},
|
||||
textL1: textL1Stats,
|
||||
};
|
||||
|
||||
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
||||
@@ -636,6 +846,8 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
||||
isUser: chunk.isUser,
|
||||
text: chunk.text,
|
||||
similarity: item.similarity,
|
||||
_recallReason: item._recallReason,
|
||||
_textRank: item._textRank,
|
||||
};
|
||||
}).filter(Boolean);
|
||||
|
||||
@@ -646,184 +858,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
||||
return results;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L2 Events 检索(RRF 混合 + MMR 后置)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus = new Map()) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
if (!vectorMap.size) return [];
|
||||
|
||||
// 构建/更新文本索引
|
||||
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
|
||||
ensureEventTextIndex(allEvents, revision);
|
||||
|
||||
// 文本路检索
|
||||
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
|
||||
const textGapInfo = textRanked._gapInfo || null;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// 向量路检索(只保留 L0 加权)
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
const ENTITY_BONUS_POOL = 0.10;
|
||||
|
||||
const scored = (allEvents || []).map((event, idx) => {
|
||||
const v = vectorMap.get(event.id);
|
||||
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
|
||||
|
||||
let bonus = 0;
|
||||
|
||||
// L0 加权
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (range) {
|
||||
for (let f = range.start; f <= range.end; f++) {
|
||||
if (l0FloorBonus.has(f)) {
|
||||
bonus += l0FloorBonus.get(f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const participants = (event.participants || []).map(p => normalize(p));
|
||||
let maxEntityWeight = 0;
|
||||
for (const p of participants) {
|
||||
const w = normalizedEntityWeights.get(p) || 0;
|
||||
if (w > maxEntityWeight) {
|
||||
maxEntityWeight = w;
|
||||
}
|
||||
}
|
||||
const entityBonus = ENTITY_BONUS_POOL * maxEntityWeight;
|
||||
bonus += entityBonus;
|
||||
|
||||
return {
|
||||
_id: event.id,
|
||||
_idx: idx,
|
||||
event,
|
||||
rawSim,
|
||||
finalScore: rawSim + bonus,
|
||||
vector: v,
|
||||
_entityBonus: entityBonus,
|
||||
_hasPresent: maxEntityWeight > 0,
|
||||
};
|
||||
});
|
||||
|
||||
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
|
||||
const entityBonusById = new Map(scored.map(s => [s._id, s._entityBonus]));
|
||||
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
|
||||
|
||||
const preFilterDistribution = {
|
||||
total: scored.length,
|
||||
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
||||
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
|
||||
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
|
||||
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
|
||||
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
|
||||
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
|
||||
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
||||
};
|
||||
|
||||
const candidates = scored
|
||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
|
||||
const vectorRanked = candidates.map(s => ({
|
||||
event: s.event,
|
||||
similarity: s.finalScore,
|
||||
rawSim: s.rawSim,
|
||||
vector: s.vector,
|
||||
}));
|
||||
|
||||
const eventById = new Map(allEvents.map(e => [e.id, e]));
|
||||
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
|
||||
|
||||
const textOnlyStats = {
|
||||
total: 0,
|
||||
passedSoftCheck: 0,
|
||||
filtered: 0,
|
||||
finalIncluded: 0,
|
||||
truncatedByLimit: 0,
|
||||
};
|
||||
|
||||
const filtered = fused.filter(x => {
|
||||
if (x.type !== 'TEXT') return true;
|
||||
|
||||
textOnlyStats.total++;
|
||||
const sim = x.rawSim || rawSimById.get(x.id) || 0;
|
||||
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
|
||||
textOnlyStats.passedSoftCheck++;
|
||||
return true;
|
||||
}
|
||||
|
||||
textOnlyStats.filtered++;
|
||||
return false;
|
||||
});
|
||||
|
||||
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
|
||||
...x,
|
||||
_id: x.id,
|
||||
}));
|
||||
|
||||
const mmrOutput = mmrSelect(
|
||||
mmrInput,
|
||||
CONFIG.MAX_EVENTS,
|
||||
CONFIG.MMR_LAMBDA,
|
||||
c => c.vector || null,
|
||||
c => c.rrf
|
||||
);
|
||||
|
||||
let textOnlyCount = 0;
|
||||
const finalResults = mmrOutput.filter(x => {
|
||||
if (x.type !== 'TEXT') return true;
|
||||
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
|
||||
textOnlyCount++;
|
||||
return true;
|
||||
}
|
||||
textOnlyStats.truncatedByLimit++;
|
||||
return false;
|
||||
});
|
||||
textOnlyStats.finalIncluded = textOnlyCount;
|
||||
|
||||
const results = finalResults.map(x => ({
|
||||
event: x.event,
|
||||
similarity: x.rrf,
|
||||
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
|
||||
_recallReason: x.type,
|
||||
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
|
||||
_entityBonus: entityBonusById.get(x.event?.id) || 0,
|
||||
_rawSim: rawSimById.get(x.event?.id) || 0,
|
||||
}));
|
||||
|
||||
// 统计信息附加到第一条结果
|
||||
if (results.length > 0) {
|
||||
results[0]._preFilterDistribution = preFilterDistribution;
|
||||
results[0]._rrfStats = {
|
||||
vectorCount: vectorRanked.length,
|
||||
textCount: textRanked.length,
|
||||
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
|
||||
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
|
||||
textOnlyTotal: textOnlyStats.total,
|
||||
};
|
||||
results[0]._textOnlyStats = textOnlyStats;
|
||||
results[0]._textGapInfo = textGapInfo;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 日志
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
function formatRecallLog({
|
||||
elapsed,
|
||||
segments,
|
||||
@@ -831,7 +865,7 @@ function formatRecallLog({
|
||||
chunkResults,
|
||||
eventResults,
|
||||
allEvents,
|
||||
normalizedEntityWeights = new Map(),
|
||||
queryEntities = [],
|
||||
causalEvents = [],
|
||||
chunkPreFilterStats = null,
|
||||
l0Results = [],
|
||||
@@ -840,15 +874,15 @@ function formatRecallLog({
|
||||
textQueryBreakdown = null,
|
||||
}) {
|
||||
const lines = [
|
||||
'\u2554' + '\u2550'.repeat(62) + '\u2557',
|
||||
'\u2551 记忆召回报告 \u2551',
|
||||
'\u2560' + '\u2550'.repeat(62) + '\u2563',
|
||||
`\u2551 耗时: ${elapsed}ms`,
|
||||
'\u255a' + '\u2550'.repeat(62) + '\u255d',
|
||||
'╔' + '═'.repeat(62) + '╗',
|
||||
'║ 记忆召回报告 ║',
|
||||
'╠' + '═'.repeat(62) + '╣',
|
||||
`║ 耗时: ${elapsed}ms`,
|
||||
'╚' + '═'.repeat(62) + '╝',
|
||||
'',
|
||||
'\u250c' + '\u2500'.repeat(61) + '\u2510',
|
||||
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
|
||||
'\u2514' + '\u2500'.repeat(61) + '\u2518',
|
||||
'┌' + '─'.repeat(61) + '┐',
|
||||
`│ 【查询构建】最近 ${CONFIG.QUERY_MSG_COUNT} 条,内容感知加权 (β=${CONFIG.QUERY_DECAY_BETA}) │`,
|
||||
'└' + '─'.repeat(61) + '┘',
|
||||
];
|
||||
|
||||
const segmentsSorted = segments.map((s, i) => ({
|
||||
@@ -858,25 +892,19 @@ function formatRecallLog({
|
||||
})).sort((a, b) => b.weight - a.weight);
|
||||
|
||||
segmentsSorted.forEach((s, rank) => {
|
||||
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
|
||||
const bar = '█'.repeat(Math.round(s.weight * 20));
|
||||
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
||||
const marker = rank === 0 ? ' ◀ 主导' : '';
|
||||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||||
});
|
||||
|
||||
lines.push('');
|
||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
||||
lines.push('\u2502 【提取实体】 \u2502');
|
||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
||||
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||
lines.push('│ 【提取实体】 │');
|
||||
lines.push('└' + '─'.repeat(61) + '┘');
|
||||
|
||||
if (normalizedEntityWeights?.size) {
|
||||
const sorted = Array.from(normalizedEntityWeights.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 8);
|
||||
const formatted = sorted
|
||||
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
|
||||
.join(' | ');
|
||||
lines.push(` ${formatted}`);
|
||||
if (queryEntities?.length) {
|
||||
lines.push(` 焦点: ${queryEntities.slice(0, 8).join('、')}${queryEntities.length > 8 ? ' ...' : ''}`);
|
||||
} else {
|
||||
lines.push(' (无)');
|
||||
}
|
||||
@@ -885,9 +913,9 @@ function formatRecallLog({
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
||||
lines.push('\u2502 【文本路 Query 构成】 \u2502');
|
||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
||||
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||
lines.push('│ 【文本路 Query 构成】 │');
|
||||
lines.push('└' + '─'.repeat(61) + '┘');
|
||||
|
||||
if (textQueryBreakdown) {
|
||||
const bd = textQueryBreakdown;
|
||||
@@ -919,23 +947,9 @@ function formatRecallLog({
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push(' 实体归一化(用于加分):');
|
||||
if (normalizedEntityWeights?.size) {
|
||||
const sorted = Array.from(normalizedEntityWeights.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 8);
|
||||
const formatted = sorted
|
||||
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
|
||||
.join(' | ');
|
||||
lines.push(` ${formatted}`);
|
||||
} else {
|
||||
lines.push(' (无)');
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
||||
lines.push('\u2502 【召回统计】 \u2502');
|
||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
||||
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||
lines.push('│ 【召回统计】 │');
|
||||
lines.push('└' + '─'.repeat(61) + '┘');
|
||||
|
||||
// L0
|
||||
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
||||
@@ -953,6 +967,11 @@ function formatRecallLog({
|
||||
const dist = chunkPreFilterStats.distribution || {};
|
||||
lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length} 条`);
|
||||
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
|
||||
|
||||
const textL1 = chunkPreFilterStats.textL1;
|
||||
if (textL1) {
|
||||
lines.push(` 文本路补充(待整理区): 范围 ${textL1.range}楼 | 候选 ${textL1.candidates} 条 | 命中 ${textL1.hits} 条`);
|
||||
}
|
||||
} else {
|
||||
lines.push(` 选入: ${chunkResults.length} 条`);
|
||||
}
|
||||
@@ -988,9 +1007,6 @@ function formatRecallLog({
|
||||
lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
|
||||
});
|
||||
}
|
||||
const entityBoostedEvents = eventResults.filter(e => e._entityBonus > 0).length;
|
||||
lines.push('');
|
||||
lines.push(` 实体加分事件: ${entityBoostedEvents} 条`);
|
||||
|
||||
if (textGapInfo) {
|
||||
lines.push('');
|
||||
@@ -1002,7 +1018,6 @@ function formatRecallLog({
|
||||
}
|
||||
}
|
||||
|
||||
// Causal
|
||||
if (causalEvents.length) {
|
||||
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
|
||||
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
|
||||
@@ -1012,13 +1027,8 @@ function formatRecallLog({
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 主入口
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
|
||||
const T0 = performance.now();
|
||||
const { chat } = getContext();
|
||||
@@ -1049,9 +1059,9 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
const lexicon = buildEntityLexicon(store, allEvents);
|
||||
const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
|
||||
const queryEntities = Array.from(queryEntityWeights.keys());
|
||||
const queryEntitySet = new Set(queryEntities.map(normalize));
|
||||
const facts = getFacts(store);
|
||||
const expandedTerms = expandByFacts(queryEntities, facts, 2);
|
||||
const normalizedEntityWeights = normalizeEntityWeights(queryEntityWeights);
|
||||
|
||||
let queryTextForSearch = '';
|
||||
let textQueryBreakdown = null;
|
||||
@@ -1079,8 +1089,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
}
|
||||
|
||||
const [chunkResults, eventResults] = await Promise.all([
|
||||
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
|
||||
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus),
|
||||
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor, {
|
||||
queryEntities,
|
||||
rareTerms: textQueryBreakdown?.rareTerms || [],
|
||||
}),
|
||||
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus),
|
||||
]);
|
||||
|
||||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||||
@@ -1118,7 +1131,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
chunkResults: mergedChunks,
|
||||
eventResults,
|
||||
allEvents,
|
||||
normalizedEntityWeights,
|
||||
queryEntities,
|
||||
causalEvents: causalEventsTruncated,
|
||||
chunkPreFilterStats,
|
||||
l0Results,
|
||||
@@ -1149,3 +1162,8 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
|
||||
return `${speaker}: ${text.slice(0, 500)}`;
|
||||
}).filter(Boolean).join('\n');
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// text-search.js - 最终版
|
||||
|
||||
import MiniSearch from '../../../libs/minisearch.mjs';
|
||||
import MiniSearch from '../../../../libs/minisearch.mjs';
|
||||
|
||||
const STOP_WORDS = new Set([
|
||||
'的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
|
||||
@@ -106,7 +106,7 @@ export function ensureEventTextIndex(events, revision) {
|
||||
*
|
||||
* 参考:帕累托法则(80/20 法则)在信息检索中的应用
|
||||
*/
|
||||
function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
|
||||
export function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
|
||||
if (!scores.length) return 0;
|
||||
|
||||
const total = scores.reduce((a, b) => a + b, 0);
|
||||
@@ -171,3 +171,67 @@ export function clearEventTextIndex() {
|
||||
idx = null;
|
||||
lastRevision = null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Chunk 文本索引(待整理区 L1 补充)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let chunkIdx = null;
|
||||
let chunkIdxRevision = null;
|
||||
|
||||
export function ensureChunkTextIndex(chunks, revision) {
|
||||
if (chunkIdx && revision === chunkIdxRevision) return;
|
||||
|
||||
try {
|
||||
chunkIdx = new MiniSearch({
|
||||
fields: ['text'],
|
||||
storeFields: ['chunkId', 'floor'],
|
||||
tokenize,
|
||||
searchOptions: { tokenize },
|
||||
});
|
||||
|
||||
chunkIdx.addAll(chunks.map(c => ({
|
||||
id: c.chunkId,
|
||||
chunkId: c.chunkId,
|
||||
floor: c.floor,
|
||||
text: c.text || '',
|
||||
})));
|
||||
|
||||
chunkIdxRevision = revision;
|
||||
} catch (e) {
|
||||
console.error('[text-search] Chunk index build failed:', e);
|
||||
chunkIdx = null;
|
||||
}
|
||||
}
|
||||
|
||||
export function searchChunksByText(query, floorMin, floorMax, limit = 20) {
|
||||
if (!chunkIdx || !query?.trim()) return [];
|
||||
|
||||
try {
|
||||
const results = chunkIdx.search(query, {
|
||||
fuzzy: false,
|
||||
prefix: false,
|
||||
});
|
||||
|
||||
const filtered = results.filter(r => r.floor >= floorMin && r.floor <= floorMax);
|
||||
if (!filtered.length) return [];
|
||||
|
||||
const scores = filtered.map(r => r.score);
|
||||
const k = dynamicTopK(scores, 0.85, 5, limit);
|
||||
|
||||
return filtered.slice(0, k).map((r, i) => ({
|
||||
chunkId: r.chunkId,
|
||||
floor: r.floor,
|
||||
textRank: i + 1,
|
||||
score: r.score,
|
||||
}));
|
||||
} catch (e) {
|
||||
console.error('[text-search] Chunk search failed:', e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export function clearChunkTextIndex() {
|
||||
chunkIdx = null;
|
||||
chunkIdxRevision = null;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { extensionFolderPath } from '../../../core/constants.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { extensionFolderPath } from '../../../../core/constants.js';
|
||||
|
||||
const MODULE_ID = 'tokenizer';
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
chunkVectorsTable,
|
||||
eventVectorsTable,
|
||||
CHUNK_MAX_TOKENS,
|
||||
} from '../data/db.js';
|
||||
} from '../../data/db.js';
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
@@ -4,11 +4,11 @@
|
||||
// StateVector 存 IndexedDB(可重建)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { saveMetadataDebounced } from '../../../../../../extensions.js';
|
||||
import { chat_metadata } from '../../../../../../../script.js';
|
||||
import { stateVectorsTable } from '../data/db.js';
|
||||
import { EXT_ID } from '../../../core/constants.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
|
||||
import { chat_metadata } from '../../../../../../../../script.js';
|
||||
import { stateVectorsTable } from '../../data/db.js';
|
||||
import { EXT_ID } from '../../../../core/constants.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'state-store';
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
// 向量数据导入导出(当前 chatId 级别)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs';
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
|
||||
import { getContext } from '../../../../../../../extensions.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import {
|
||||
getMeta,
|
||||
updateMeta,
|
||||
@@ -26,8 +26,8 @@ import {
|
||||
saveStateVectors,
|
||||
clearStateVectors,
|
||||
} from './state-store.js';
|
||||
import { getEngineFingerprint } from './embedder.js';
|
||||
import { getVectorConfig } from '../data/config.js';
|
||||
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'vector-io';
|
||||
const EXPORT_VERSION = 1;
|
||||
@@ -3,7 +3,7 @@
|
||||
// 统一的向量生成接口(本地模型 / 在线服务)
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { xbLog } from '../../../core/debug-core.js';
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
|
||||
const MODULE_ID = 'embedding';
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// 跳过用户定义的「起始→结束」区间
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getTextFilterRules } from '../data/config.js';
|
||||
import { getTextFilterRules } from '../../data/config.js';
|
||||
|
||||
/**
|
||||
* 转义正则特殊字符
|
||||
Reference in New Issue
Block a user