Update story summary recall and prompt
This commit is contained in:
@@ -5,10 +5,11 @@ import { getContext, saveMetadataDebounced } from "../../../../../../extensions.
|
|||||||
import { chat_metadata } from "../../../../../../../script.js";
|
import { chat_metadata } from "../../../../../../../script.js";
|
||||||
import { EXT_ID } from "../../../core/constants.js";
|
import { EXT_ID } from "../../../core/constants.js";
|
||||||
import { xbLog } from "../../../core/debug-core.js";
|
import { xbLog } from "../../../core/debug-core.js";
|
||||||
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/chunk-store.js";
|
import { clearEventVectors, deleteEventVectorsByIds } from "../vector/storage/chunk-store.js";
|
||||||
import { clearEventTextIndex } from '../vector/text-search.js';
|
import { clearEventTextIndex } from '../vector/retrieval/text-search.js';
|
||||||
|
|
||||||
const MODULE_ID = 'summaryStore';
|
const MODULE_ID = 'summaryStore';
|
||||||
|
const FACTS_LIMIT_PER_SUBJECT = 10;
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 基础存取
|
// 基础存取
|
||||||
@@ -125,32 +126,26 @@ function getNextFactId(existingFacts) {
|
|||||||
export function mergeFacts(existingFacts, updates, floor) {
|
export function mergeFacts(existingFacts, updates, floor) {
|
||||||
const map = new Map();
|
const map = new Map();
|
||||||
|
|
||||||
// 加载现有 facts
|
|
||||||
for (const f of existingFacts || []) {
|
for (const f of existingFacts || []) {
|
||||||
if (!f.retracted) {
|
if (!f.retracted) {
|
||||||
map.set(factKey(f), f);
|
map.set(factKey(f), f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取下一个 ID
|
|
||||||
let nextId = getNextFactId(existingFacts);
|
let nextId = getNextFactId(existingFacts);
|
||||||
|
|
||||||
// 应用更新
|
|
||||||
for (const u of updates || []) {
|
for (const u of updates || []) {
|
||||||
if (!u.s || !u.p) continue;
|
if (!u.s || !u.p) continue;
|
||||||
|
|
||||||
const key = factKey(u);
|
const key = factKey(u);
|
||||||
|
|
||||||
// 删除操作
|
|
||||||
if (u.retracted === true) {
|
if (u.retracted === true) {
|
||||||
map.delete(key);
|
map.delete(key);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 无 o 则跳过
|
|
||||||
if (!u.o || !String(u.o).trim()) continue;
|
if (!u.o || !String(u.o).trim()) continue;
|
||||||
|
|
||||||
// 覆盖或新增
|
|
||||||
const existing = map.get(key);
|
const existing = map.get(key);
|
||||||
const newFact = {
|
const newFact = {
|
||||||
id: existing?.id || `f-${nextId++}`,
|
id: existing?.id || `f-${nextId++}`,
|
||||||
@@ -158,14 +153,13 @@ export function mergeFacts(existingFacts, updates, floor) {
|
|||||||
p: u.p.trim(),
|
p: u.p.trim(),
|
||||||
o: String(u.o).trim(),
|
o: String(u.o).trim(),
|
||||||
since: floor,
|
since: floor,
|
||||||
|
_isState: existing?._isState ?? !!u.isState,
|
||||||
};
|
};
|
||||||
|
|
||||||
// 关系类保留 trend
|
|
||||||
if (isRelationFact(newFact) && u.trend) {
|
if (isRelationFact(newFact) && u.trend) {
|
||||||
newFact.trend = u.trend;
|
newFact.trend = u.trend;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 保留原始 _addedAt(如果是更新)
|
|
||||||
if (existing?._addedAt != null) {
|
if (existing?._addedAt != null) {
|
||||||
newFact._addedAt = existing._addedAt;
|
newFact._addedAt = existing._addedAt;
|
||||||
} else {
|
} else {
|
||||||
@@ -175,9 +169,28 @@ export function mergeFacts(existingFacts, updates, floor) {
|
|||||||
map.set(key, newFact);
|
map.set(key, newFact);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Array.from(map.values());
|
const factsBySubject = new Map();
|
||||||
|
for (const f of map.values()) {
|
||||||
|
if (f._isState) continue;
|
||||||
|
const arr = factsBySubject.get(f.s) || [];
|
||||||
|
arr.push(f);
|
||||||
|
factsBySubject.set(f.s, arr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const toRemove = new Set();
|
||||||
|
for (const arr of factsBySubject.values()) {
|
||||||
|
if (arr.length > FACTS_LIMIT_PER_SUBJECT) {
|
||||||
|
arr.sort((a, b) => (a._addedAt || 0) - (b._addedAt || 0));
|
||||||
|
for (let i = 0; i < arr.length - FACTS_LIMIT_PER_SUBJECT; i++) {
|
||||||
|
toRemove.add(factKey(arr[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(map.values()).filter(f => !toRemove.has(factKey(f)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 旧数据迁移
|
// 旧数据迁移
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ function sanitizeFacts(parsed) {
|
|||||||
|
|
||||||
if (!s || !pRaw) continue;
|
if (!s || !pRaw) continue;
|
||||||
|
|
||||||
// 删除操作
|
|
||||||
if (item.retracted === true) {
|
if (item.retracted === true) {
|
||||||
ok.push({ s, p: pRaw, retracted: true });
|
ok.push({ s, p: pRaw, retracted: true });
|
||||||
continue;
|
continue;
|
||||||
@@ -43,11 +42,15 @@ function sanitizeFacts(parsed) {
|
|||||||
|
|
||||||
const relP = normalizeRelationPredicate(pRaw);
|
const relP = normalizeRelationPredicate(pRaw);
|
||||||
const isRel = !!relP;
|
const isRel = !!relP;
|
||||||
const fact = { s, p: isRel ? relP : pRaw, o };
|
const fact = {
|
||||||
|
s,
|
||||||
|
p: isRel ? relP : pRaw,
|
||||||
|
o,
|
||||||
|
isState: !!item.isState,
|
||||||
|
};
|
||||||
|
|
||||||
// 关系类保留 trend
|
|
||||||
if (isRel && item.trend) {
|
if (isRel && item.trend) {
|
||||||
const validTrends = ['破裂', '厌恶', '反感', '陌生', '投缘', '亲密', '交融'];
|
const validTrends = ['??', '??', '??', '??', '??', '??', '??'];
|
||||||
if (validTrends.includes(item.trend)) {
|
if (validTrends.includes(item.trend)) {
|
||||||
fact.trend = item.trend;
|
fact.trend = item.trend;
|
||||||
}
|
}
|
||||||
@@ -59,6 +62,7 @@ function sanitizeFacts(parsed) {
|
|||||||
parsed.factUpdates = ok;
|
parsed.factUpdates = ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// causedBy 清洗(事件因果边)
|
// causedBy 清洗(事件因果边)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|||||||
@@ -100,14 +100,19 @@ Acknowledged. Now reviewing the incremental summarization specifications:
|
|||||||
├─ progress: 0.0 to 1.0
|
├─ progress: 0.0 to 1.0
|
||||||
└─ newMoment: 仅记录本次新增的关键时刻
|
└─ newMoment: 仅记录本次新增的关键时刻
|
||||||
|
|
||||||
[Fact Tracking - SPO Triples]
|
[Fact Tracking - SPO ???]
|
||||||
├─ s: 主体(角色名/物品名)
|
?? ??: ?? & ???????
|
||||||
├─ p: 谓词(属性名)
|
?? ??: ??????????????????
|
||||||
│ - 关系类只允许:对X的看法 / 与X的关系
|
?? SPO ??:
|
||||||
├─ o: 值(当前状态)
|
? s: ??????/????
|
||||||
├─ trend: 仅关系类填写
|
? p: ??????????????
|
||||||
├─ retracted: 删除标记
|
? o: ???
|
||||||
└─ s+p 为键,相同键会覆盖旧值
|
?? KV ??: s+p ??????????
|
||||||
|
?? isState ????????:
|
||||||
|
? true = ????????????/??/??/???
|
||||||
|
? false = ??????????????
|
||||||
|
?? trend: ?????????/??/??/??/??/??/???
|
||||||
|
?? retracted: true ???????
|
||||||
|
|
||||||
Ready to process incremental summary requests with strict deduplication.`,
|
Ready to process incremental summary requests with strict deduplication.`,
|
||||||
|
|
||||||
@@ -179,24 +184,26 @@ Before generating, observe the USER and analyze carefully:
|
|||||||
],
|
],
|
||||||
"factUpdates": [
|
"factUpdates": [
|
||||||
{
|
{
|
||||||
"s": "主体(角色名/物品名)",
|
"s": "主体",
|
||||||
"p": "谓词(属性名/对X的看法)",
|
"p": "谓词(复用已有谓词,避免同义词)",
|
||||||
"o": "当前值",
|
"o": "当前值",
|
||||||
"trend": "破裂|厌恶|反感|陌生|投缘|亲密|交融",
|
"isState": true/false,
|
||||||
"retracted": false
|
"trend": "仅关系类:破裂|厌恶|反感|陌生|投缘|亲密|交融"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
\`\`\`
|
\`\`\`
|
||||||
|
|
||||||
## factUpdates 规则
|
## factUpdates 规则
|
||||||
|
- 目的: 纠错 & 世界一致性约束,只记录硬性事实
|
||||||
- s+p 为键,相同键会覆盖旧值
|
- s+p 为键,相同键会覆盖旧值
|
||||||
- 状态类:s=角色名, p=属性(生死/位置/状态等), o=值
|
- isState: true=核心约束(位置/身份/生死/关系),false=有容量上限会被清理
|
||||||
- 关系类:s=角色A, p="对B的看法" 或 p="与B的关系",trend 仅限关系类
|
- 关系类: p="对X的看法",trend 必填
|
||||||
- 删除:设置 retracted: true(不需要填 o)
|
- 删除: 设置 retracted: true
|
||||||
- 只输出有变化的条目
|
- 谓词规范化: 复用已有谓词,不要发明同义词
|
||||||
- 硬约束才记录,避免叙事化,确保少、硬、稳定
|
- 只输出有变化的条目,确保少、硬、稳定
|
||||||
|
|
||||||
## CRITICAL NOTES
|
## CRITICAL NOTES
|
||||||
- events.id 从 evt-{nextEventId} 开始编号
|
- events.id 从 evt-{nextEventId} 开始编号
|
||||||
- 仅输出【增量】内容,已有事件绝不重复
|
- 仅输出【增量】内容,已有事件绝不重复
|
||||||
@@ -267,9 +274,11 @@ function waitForStreamingComplete(sessionId, streamingMod, timeout = 120000) {
|
|||||||
|
|
||||||
function formatFactsForLLM(facts) {
|
function formatFactsForLLM(facts) {
|
||||||
if (!facts?.length) {
|
if (!facts?.length) {
|
||||||
return '(空白,尚无事实记录)';
|
return { text: '(空白,尚无事实记录)', predicates: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const predicates = [...new Set(facts.map(f => f.p).filter(Boolean))];
|
||||||
|
|
||||||
const lines = facts.map(f => {
|
const lines = facts.map(f => {
|
||||||
if (f.trend) {
|
if (f.trend) {
|
||||||
return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`;
|
return `- ${f.s} | ${f.p} | ${f.o} [${f.trend}]`;
|
||||||
@@ -277,11 +286,18 @@ function formatFactsForLLM(facts) {
|
|||||||
return `- ${f.s} | ${f.p} | ${f.o}`;
|
return `- ${f.s} | ${f.p} | ${f.o}`;
|
||||||
});
|
});
|
||||||
|
|
||||||
return lines.join('\n') || '(空白,尚无事实记录)';
|
return {
|
||||||
|
text: lines.join('\n') || '(空白,尚无事实记录)',
|
||||||
|
predicates,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) {
|
function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, historyRange, nextEventId, existingEventCount) {
|
||||||
const factsText = formatFactsForLLM(existingFacts);
|
const { text: factsText, predicates } = formatFactsForLLM(existingFacts);
|
||||||
|
|
||||||
|
const predicatesHint = predicates.length > 0
|
||||||
|
? `\n\n<\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>\n${predicates.join('\u3001')}\n</\u5df2\u6709\u8c13\u8bcd\uff0c\u8bf7\u590d\u7528>`
|
||||||
|
: '';
|
||||||
|
|
||||||
const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat
|
const jsonFormat = LLM_PROMPT_CONFIG.userJsonFormat
|
||||||
.replace(/\{nextEventId\}/g, String(nextEventId));
|
.replace(/\{nextEventId\}/g, String(nextEventId));
|
||||||
@@ -293,9 +309,9 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
|
|||||||
{ role: 'system', content: LLM_PROMPT_CONFIG.topSystem },
|
{ role: 'system', content: LLM_PROMPT_CONFIG.topSystem },
|
||||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc },
|
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantDoc },
|
||||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary },
|
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskSummary },
|
||||||
{ role: 'user', content: `<已有总结状态>\n${existingSummary}\n</已有总结状态>\n\n<当前事实图谱>\n${factsText}\n</当前事实图谱>` },
|
{ role: 'user', content: `<\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n${existingSummary}\n</\u5df2\u6709\u603b\u7ed3\u72b6\u6001>\n\n<\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>\n${factsText}\n</\u5f53\u524d\u4e8b\u5b9e\u56fe\u8c31>${predicatesHint}` },
|
||||||
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent },
|
{ role: 'assistant', content: LLM_PROMPT_CONFIG.assistantAskContent },
|
||||||
{ role: 'user', content: `<新对话内容>(${historyRange})\n${newHistoryText}\n</新对话内容>` }
|
{ role: 'user', content: `<\u65b0\u5bf9\u8bdd\u5185\u5bb9>\uff08${historyRange}\uff09\n${newHistoryText}\n</\u65b0\u5bf9\u8bdd\u5185\u5bb9>` }
|
||||||
];
|
];
|
||||||
|
|
||||||
const bottomMessages = [
|
const bottomMessages = [
|
||||||
@@ -311,6 +327,7 @@ function buildSummaryMessages(existingSummary, existingFacts, newHistoryText, hi
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// JSON 解析
|
// JSON 解析
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// Story Summary - Prompt Injection (Final Clean Version)
|
// Story Summary - Prompt Injection (Final Clean Version)
|
||||||
// - 仅负责“构建注入文本”,不负责写入 extension_prompts
|
// - 仅负责"构建注入文本",不负责写入 extension_prompts
|
||||||
// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts(IN_CHAT + depth)
|
// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts(IN_CHAT + depth)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
@@ -8,8 +8,8 @@ import { getContext } from "../../../../../../extensions.js";
|
|||||||
import { xbLog } from "../../../core/debug-core.js";
|
import { xbLog } from "../../../core/debug-core.js";
|
||||||
import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js";
|
import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js";
|
||||||
import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js";
|
import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js";
|
||||||
import { recallMemory, buildQueryText } from "../vector/recall.js";
|
import { recallMemory, buildQueryText } from "../vector/retrieval/recall.js";
|
||||||
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/chunk-store.js";
|
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/storage/chunk-store.js";
|
||||||
|
|
||||||
const MODULE_ID = "summaryPrompt";
|
const MODULE_ID = "summaryPrompt";
|
||||||
|
|
||||||
@@ -85,6 +85,49 @@ function cleanSummary(summary) {
|
|||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
// 上下文配对工具函数
|
||||||
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取chunk的配对楼层
|
||||||
|
* USER楼层 → 下一楼(AI回复)
|
||||||
|
* AI楼层 → 上一楼(USER发言)
|
||||||
|
*/
|
||||||
|
function getContextFloor(chunk) {
|
||||||
|
if (chunk.isL0) return -1; // L0虚拟chunk不需要配对
|
||||||
|
return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 从候选chunks中选择最佳配对
|
||||||
|
* 策略:优先选择相反角色的第一个chunk
|
||||||
|
*/
|
||||||
|
function pickContextChunk(candidates, mainChunk) {
|
||||||
|
if (!candidates?.length) return null;
|
||||||
|
const targetIsUser = !mainChunk.isUser;
|
||||||
|
// 优先相反角色
|
||||||
|
const opposite = candidates.find(c => c.isUser === targetIsUser);
|
||||||
|
if (opposite) return opposite;
|
||||||
|
// 否则选第一个
|
||||||
|
return candidates[0];
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* 格式化配对chunk(完整显示,带缩进和方向符号)
|
||||||
|
*/
|
||||||
|
function formatContextChunkLine(chunk, isAbove) {
|
||||||
|
const { name1, name2 } = getContext();
|
||||||
|
const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
|
||||||
|
const text = String(chunk.text || "").trim();
|
||||||
|
const symbol = isAbove ? "┌" : "└";
|
||||||
|
return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 格式化配对chunk(缩进,简短摘要)
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// 系统前导与后缀
|
// 系统前导与后缀
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
@@ -150,7 +193,31 @@ function formatChunkFullLine(c) {
|
|||||||
return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
|
return `› #${c.floor + 1} [${speaker}] ${String(c.text || "").trim()}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 因果事件格式(仅作为“前因线索”展示,仍保留楼层提示)
|
/**
|
||||||
|
* 格式化chunk及其配对上下文
|
||||||
|
* 返回数组:[配对行(如果在前), 主chunk行, 配对行(如果在后)]
|
||||||
|
*/
|
||||||
|
function formatChunkWithContext(mainChunk, contextChunk) {
|
||||||
|
const lines = [];
|
||||||
|
const mainLine = formatChunkFullLine(mainChunk);
|
||||||
|
|
||||||
|
if (!contextChunk) {
|
||||||
|
lines.push(mainLine);
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contextChunk.floor < mainChunk.floor) {
|
||||||
|
lines.push(formatContextChunkLine(contextChunk, true));
|
||||||
|
lines.push(mainLine);
|
||||||
|
} else {
|
||||||
|
lines.push(mainLine);
|
||||||
|
lines.push(formatContextChunkLine(contextChunk, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 因果事件格式(仅作为"前因线索"展示,仍保留楼层提示)
|
||||||
function formatCausalEventLine(causalItem, causalById) {
|
function formatCausalEventLine(causalItem, causalById) {
|
||||||
const ev = causalItem?.event || {};
|
const ev = causalItem?.event || {};
|
||||||
const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
|
const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
|
||||||
@@ -172,9 +239,8 @@ function formatCausalEventLine(causalItem, causalById) {
|
|||||||
const evidence = causalItem._evidenceChunk;
|
const evidence = causalItem._evidenceChunk;
|
||||||
if (evidence) {
|
if (evidence) {
|
||||||
const speaker = evidence.speaker || "角色";
|
const speaker = evidence.speaker || "角色";
|
||||||
const preview = String(evidence.text || "");
|
const text = String(evidence.text || "").trim();
|
||||||
const clip = preview.length > 60 ? preview.slice(0, 60) + "..." : preview;
|
lines.push(`${indent} › #${evidence.floor + 1} [${speaker}] ${text}`);
|
||||||
lines.push(`${indent} › #${evidence.floor + 1} [${speaker}] ${clip}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return lines.join("\n");
|
return lines.join("\n");
|
||||||
@@ -216,11 +282,13 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
|
|||||||
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
|
const l1OrphanCount = (stats.orphans.injected || 0) - l0OrphanCount;
|
||||||
lines.push(` [3] 远期片段 (已总结范围)`);
|
lines.push(` [3] 远期片段 (已总结范围)`);
|
||||||
lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
|
lines.push(` 选入: ${stats.orphans.injected} 条 (L0: ${l0OrphanCount}, L1: ${l1OrphanCount}) | 消耗: ${stats.orphans.tokens} tokens`);
|
||||||
|
lines.push(` 配对: ${stats.orphans.contextPairs || 0} 条`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
// [4] 待整理
|
// [4] 待整理
|
||||||
lines.push(` [4] 待整理 (独立预算 5000)`);
|
lines.push(` [4] 待整理 (独立预算 5000)`);
|
||||||
lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
|
lines.push(` 选入: ${recentOrphanStats?.injected || 0} 条 | 消耗: ${recentOrphanStats?.tokens || 0} tokens`);
|
||||||
|
lines.push(` 配对: ${recentOrphanStats?.contextPairs || 0} 条`);
|
||||||
lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
|
lines.push(` 楼层: ${recentOrphanStats?.floorRange || 'N/A'}`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
|
|
||||||
@@ -248,7 +316,7 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
|
|||||||
return lines.join('\n');
|
return lines.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
|
// 重写事件文本里的序号前缀:把 "{idx}. " 或 "{idx}.【...】" 的 idx 替换
|
||||||
function renumberEventText(text, newIndex) {
|
function renumberEventText(text, newIndex) {
|
||||||
const s = String(text || "");
|
const s = String(text || "");
|
||||||
// 匹配行首: "12." 或 "12.【"
|
// 匹配行首: "12." 或 "12.【"
|
||||||
@@ -325,11 +393,12 @@ export function buildNonVectorPromptText() {
|
|||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
// 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光)
|
// 向量模式:预算装配(世界 → 事件(带证据) → 碎片 → 弧光)
|
||||||
// ─────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) {
|
async function buildVectorPrompt(store, recallResult, causalById, queryEntities = [], meta = null) {
|
||||||
|
const { chatId } = getContext();
|
||||||
const data = store.json || {};
|
const data = store.json || {};
|
||||||
const total = { used: 0, max: MAIN_BUDGET_MAX };
|
const total = { used: 0, max: MAIN_BUDGET_MAX };
|
||||||
|
|
||||||
@@ -351,13 +420,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
|||||||
arcs: { count: 0, tokens: 0 },
|
arcs: { count: 0, tokens: 0 },
|
||||||
events: { selected: 0, tokens: 0 },
|
events: { selected: 0, tokens: 0 },
|
||||||
evidence: { attached: 0, tokens: 0 },
|
evidence: { attached: 0, tokens: 0 },
|
||||||
orphans: { injected: 0, tokens: 0 },
|
orphans: { injected: 0, tokens: 0, l0Count: 0, contextPairs: 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
const recentOrphanStats = {
|
const recentOrphanStats = {
|
||||||
injected: 0,
|
injected: 0,
|
||||||
tokens: 0,
|
tokens: 0,
|
||||||
floorRange: "N/A",
|
floorRange: "N/A",
|
||||||
|
contextPairs: 0,
|
||||||
};
|
};
|
||||||
const details = {
|
const details = {
|
||||||
eventList: [],
|
eventList: [],
|
||||||
@@ -473,14 +543,14 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
|||||||
|
|
||||||
const bestChunk = pickBestChunkForEvent(e.event);
|
const bestChunk = pickBestChunkForEvent(e.event);
|
||||||
|
|
||||||
// 先尝试“带证据”
|
// 先尝试"带证据"
|
||||||
// idx 先占位写 0,后面统一按时间线重排后再改号
|
// idx 先占位写 0,后面统一按时间线重排后再改号
|
||||||
let text = formatEventWithEvidence(e, 0, bestChunk);
|
let text = formatEventWithEvidence(e, 0, bestChunk);
|
||||||
let cost = estimateTokens(text);
|
let cost = estimateTokens(text);
|
||||||
let hasEvidence = !!bestChunk;
|
let hasEvidence = !!bestChunk;
|
||||||
let chosenChunk = bestChunk || null;
|
let chosenChunk = bestChunk || null;
|
||||||
|
|
||||||
// 塞不下就退化成“不带证据”
|
// 塞不下就退化成"不带证据"
|
||||||
if (total.used + cost > total.max) {
|
if (total.used + cost > total.max) {
|
||||||
text = formatEventWithEvidence(e, 0, null);
|
text = formatEventWithEvidence(e, 0, null);
|
||||||
cost = estimateTokens(text);
|
cost = estimateTokens(text);
|
||||||
@@ -549,33 +619,90 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
|||||||
assembled.events.similar = selectedSimilarTexts;
|
assembled.events.similar = selectedSimilarTexts;
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
// [优先级 4] 远期片段(已总结范围的 orphan chunks)
|
// [优先级 4] 远期片段(已总结范围的 orphan chunks)- 带上下文配对
|
||||||
// ═══════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
const lastSummarized = store.lastSummarizedMesId ?? -1;
|
const lastSummarized = store.lastSummarizedMesId ?? -1;
|
||||||
const lastChunkFloor = meta?.lastChunkFloor ?? -1;
|
const lastChunkFloor = meta?.lastChunkFloor ?? -1;
|
||||||
const keepVisible = store.keepVisibleCount ?? 3;
|
const keepVisible = store.keepVisibleCount ?? 3;
|
||||||
|
|
||||||
if (chunks.length && total.used < total.max) {
|
// 收集需要配对的楼层
|
||||||
const orphans = chunks
|
const orphanContextFloors = new Set();
|
||||||
|
const orphanCandidates = chunks
|
||||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||||
.filter(c => c.floor <= lastSummarized)
|
.filter(c => c.floor <= lastSummarized);
|
||||||
|
|
||||||
|
for (const c of orphanCandidates) {
|
||||||
|
if (c.isL0) continue;
|
||||||
|
const pairFloor = getContextFloor(c);
|
||||||
|
if (pairFloor >= 0) orphanContextFloors.add(pairFloor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 批量获取配对楼层的chunks
|
||||||
|
let contextChunksByFloor = new Map();
|
||||||
|
if (chatId && orphanContextFloors.size > 0) {
|
||||||
|
try {
|
||||||
|
const contextChunks = await getChunksByFloors(chatId, Array.from(orphanContextFloors));
|
||||||
|
for (const pc of contextChunks) {
|
||||||
|
if (!contextChunksByFloor.has(pc.floor)) {
|
||||||
|
contextChunksByFloor.set(pc.floor, []);
|
||||||
|
}
|
||||||
|
contextChunksByFloor.get(pc.floor).push(pc);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
xbLog.warn(MODULE_ID, "获取配对chunks失败", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orphanCandidates.length && total.used < total.max) {
|
||||||
|
const orphans = orphanCandidates
|
||||||
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
||||||
|
|
||||||
const l1Budget = { used: 0, max: total.max - total.used };
|
const l1Budget = { used: 0, max: total.max - total.used };
|
||||||
|
let l0Count = 0;
|
||||||
|
let contextPairsCount = 0;
|
||||||
|
|
||||||
for (const c of orphans) {
|
for (const c of orphans) {
|
||||||
|
// L0 不需要配对
|
||||||
|
if (c.isL0) {
|
||||||
const line = formatChunkFullLine(c);
|
const line = formatChunkFullLine(c);
|
||||||
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
|
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) break;
|
||||||
injectionStats.orphans.injected++;
|
injectionStats.orphans.injected++;
|
||||||
|
l0Count++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取配对chunk
|
||||||
|
const pairFloor = getContextFloor(c);
|
||||||
|
const candidates = contextChunksByFloor.get(pairFloor) || [];
|
||||||
|
const contextChunk = pickContextChunk(candidates, c);
|
||||||
|
|
||||||
|
// 格式化(带配对)
|
||||||
|
const formattedLines = formatChunkWithContext(c, contextChunk);
|
||||||
|
|
||||||
|
// 尝试添加所有行
|
||||||
|
let allAdded = true;
|
||||||
|
for (const line of formattedLines) {
|
||||||
|
if (!pushWithBudget(assembled.orphans.lines, line, l1Budget)) {
|
||||||
|
allAdded = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allAdded) break;
|
||||||
|
|
||||||
|
injectionStats.orphans.injected++;
|
||||||
|
if (contextChunk) contextPairsCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
assembled.orphans.tokens = l1Budget.used;
|
assembled.orphans.tokens = l1Budget.used;
|
||||||
total.used += l1Budget.used;
|
total.used += l1Budget.used;
|
||||||
injectionStats.orphans.tokens = l1Budget.used;
|
injectionStats.orphans.tokens = l1Budget.used;
|
||||||
|
injectionStats.orphans.l0Count = l0Count;
|
||||||
|
injectionStats.orphans.contextPairs = contextPairsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
// [独立预算] 待整理(未总结范围,独立 5000)
|
// [独立预算] 待整理(未总结范围,独立 5000)- 带上下文配对
|
||||||
// ═══════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
// 近期范围:(lastSummarized, lastChunkFloor - keepVisible]
|
// 近期范围:(lastSummarized, lastChunkFloor - keepVisible]
|
||||||
@@ -583,55 +710,113 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
|
|||||||
const recentEnd = lastChunkFloor - keepVisible;
|
const recentEnd = lastChunkFloor - keepVisible;
|
||||||
|
|
||||||
if (chunks.length && recentEnd >= recentStart) {
|
if (chunks.length && recentEnd >= recentStart) {
|
||||||
const recentOrphans = chunks
|
const recentOrphanCandidates = chunks
|
||||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||||
.filter(c => c.floor >= recentStart && c.floor <= recentEnd)
|
.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
|
||||||
|
|
||||||
|
// 收集近期范围需要配对的楼层
|
||||||
|
const recentContextFloors = new Set();
|
||||||
|
for (const c of recentOrphanCandidates) {
|
||||||
|
if (c.isL0) continue;
|
||||||
|
const pairFloor = getContextFloor(c);
|
||||||
|
if (pairFloor >= 0) recentContextFloors.add(pairFloor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 批量获取(复用已有的 or 新获取)
|
||||||
|
let recentContextChunksByFloor = new Map();
|
||||||
|
if (chatId && recentContextFloors.size > 0) {
|
||||||
|
// 过滤掉已经获取过的
|
||||||
|
const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f));
|
||||||
|
if (newFloors.length > 0) {
|
||||||
|
try {
|
||||||
|
const newContextChunks = await getChunksByFloors(chatId, newFloors);
|
||||||
|
for (const pc of newContextChunks) {
|
||||||
|
if (!contextChunksByFloor.has(pc.floor)) {
|
||||||
|
contextChunksByFloor.set(pc.floor, []);
|
||||||
|
}
|
||||||
|
contextChunksByFloor.get(pc.floor).push(pc);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
xbLog.warn(MODULE_ID, "获取近期配对chunks失败", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
recentContextChunksByFloor = contextChunksByFloor;
|
||||||
|
}
|
||||||
|
|
||||||
|
const recentOrphans = recentOrphanCandidates
|
||||||
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0)));
|
||||||
|
|
||||||
const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };
|
const recentBudget = { used: 0, max: RECENT_ORPHAN_MAX };
|
||||||
|
let recentContextPairsCount = 0;
|
||||||
|
|
||||||
for (const c of recentOrphans) {
|
for (const c of recentOrphans) {
|
||||||
|
// L0 不需要配对
|
||||||
|
if (c.isL0) {
|
||||||
const line = formatChunkFullLine(c);
|
const line = formatChunkFullLine(c);
|
||||||
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
|
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) break;
|
||||||
recentOrphanStats.injected++;
|
recentOrphanStats.injected++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取配对chunk
|
||||||
|
const pairFloor = getContextFloor(c);
|
||||||
|
const candidates = recentContextChunksByFloor.get(pairFloor) || [];
|
||||||
|
const contextChunk = pickContextChunk(candidates, c);
|
||||||
|
|
||||||
|
// 格式化(带配对)
|
||||||
|
const formattedLines = formatChunkWithContext(c, contextChunk);
|
||||||
|
|
||||||
|
// 尝试添加所有行
|
||||||
|
let allAdded = true;
|
||||||
|
for (const line of formattedLines) {
|
||||||
|
if (!pushWithBudget(assembled.recentOrphans.lines, line, recentBudget)) {
|
||||||
|
allAdded = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allAdded) break;
|
||||||
|
|
||||||
|
recentOrphanStats.injected++;
|
||||||
|
if (contextChunk) recentContextPairsCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
assembled.recentOrphans.tokens = recentBudget.used;
|
assembled.recentOrphans.tokens = recentBudget.used;
|
||||||
recentOrphanStats.tokens = recentBudget.used;
|
recentOrphanStats.tokens = recentBudget.used;
|
||||||
recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
|
recentOrphanStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
|
||||||
|
recentOrphanStats.contextPairs = recentContextPairsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
// ═══════════════════════════════════════════════════════════════════════
|
// 按注入顺序拼接 sections
|
||||||
// 按注入顺序拼接 sections
|
// ═══════════════════════════════════════════════════════════════════
|
||||||
// ═══════════════════════════════════════════════════════════════════════
|
const sections = [];
|
||||||
const sections = [];
|
// 1. 世界约束 → 定了的事
|
||||||
// 1. 世界约束 → 定了的事
|
if (assembled.facts.lines.length) {
|
||||||
if (assembled.facts.lines.length) {
|
|
||||||
sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`);
|
sections.push(`[定了的事] 已确立的事实\n${assembled.facts.lines.join("\n")}`);
|
||||||
}
|
}
|
||||||
// 2. 核心经历 → 印象深的事
|
// 2. 核心经历 → 印象深的事
|
||||||
if (assembled.events.direct.length) {
|
if (assembled.events.direct.length) {
|
||||||
sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`);
|
sections.push(`[印象深的事] 记得很清楚\n\n${assembled.events.direct.join("\n\n")}`);
|
||||||
}
|
}
|
||||||
// 3. 过往背景 → 好像有关的事
|
// 3. 过往背景 → 好像有关的事
|
||||||
if (assembled.events.similar.length) {
|
if (assembled.events.similar.length) {
|
||||||
sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`);
|
sections.push(`[好像有关的事] 听说过或有点模糊\n\n${assembled.events.similar.join("\n\n")}`);
|
||||||
}
|
}
|
||||||
// 4. 远期片段 → 更早以前
|
// 4. 远期片段 → 更早以前
|
||||||
if (assembled.orphans.lines.length) {
|
if (assembled.orphans.lines.length) {
|
||||||
sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`);
|
sections.push(`[更早以前] 记忆里残留的老画面\n${assembled.orphans.lines.join("\n")}`);
|
||||||
}
|
}
|
||||||
// 5. 待整理 → 刚发生的
|
// 5. 待整理 → 刚发生的
|
||||||
if (assembled.recentOrphans.lines.length) {
|
if (assembled.recentOrphans.lines.length) {
|
||||||
sections.push(`[刚发生的] 还没来得及想明白\n${assembled.recentOrphans.lines.join("\n")}`);
|
sections.push(`[刚发生的] 还没来得及想明白\n${assembled.recentOrphans.lines.join("\n")}`);
|
||||||
}
|
}
|
||||||
// 6. 人物弧光 → 这些人
|
// 6. 人物弧光 → 这些人
|
||||||
if (assembled.arcs.lines.length) {
|
if (assembled.arcs.lines.length) {
|
||||||
sections.push(`[这些人] 他们现在怎样了\n${assembled.arcs.lines.join("\n")}`);
|
sections.push(`[这些人] 他们现在怎样了\n${assembled.arcs.lines.join("\n")}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sections.length) {
|
if (!sections.length) {
|
||||||
return { promptText: "", injectionLogText: "", injectionStats };
|
return { promptText: "", injectionLogText: "", injectionStats };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -846,3 +1031,4 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
|||||||
|
|
||||||
return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") };
|
return { text: finalText, logText: (recallResult.logText || "") + (injectionLogText || "") };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ import {
|
|||||||
fetchOnlineModels,
|
fetchOnlineModels,
|
||||||
isLocalModelLoaded,
|
isLocalModelLoaded,
|
||||||
DEFAULT_LOCAL_MODEL,
|
DEFAULT_LOCAL_MODEL,
|
||||||
} from "./vector/embedder.js";
|
} from "./vector/utils/embedder.js";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
getMeta,
|
getMeta,
|
||||||
@@ -66,7 +66,7 @@ import {
|
|||||||
saveChunks,
|
saveChunks,
|
||||||
saveChunkVectors,
|
saveChunkVectors,
|
||||||
getStorageStats,
|
getStorageStats,
|
||||||
} from "./vector/chunk-store.js";
|
} from "./vector/storage/chunk-store.js";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
buildIncrementalChunks,
|
buildIncrementalChunks,
|
||||||
@@ -75,12 +75,12 @@ import {
|
|||||||
syncOnMessageDeleted,
|
syncOnMessageDeleted,
|
||||||
syncOnMessageSwiped,
|
syncOnMessageSwiped,
|
||||||
syncOnMessageReceived,
|
syncOnMessageReceived,
|
||||||
} from "./vector/chunk-builder.js";
|
} from "./vector/pipeline/chunk-builder.js";
|
||||||
import { initStateIntegration, rebuildStateVectors } from "./vector/state-integration.js";
|
import { initStateIntegration, rebuildStateVectors } from "./vector/pipeline/state-integration.js";
|
||||||
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/state-store.js";
|
import { clearStateVectors, getStateAtomsCount, getStateVectorsCount } from "./vector/storage/state-store.js";
|
||||||
|
|
||||||
// vector io
|
// vector io
|
||||||
import { exportVectors, importVectors } from "./vector/vector-io.js";
|
import { exportVectors, importVectors } from "./vector/storage/vector-io.js";
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 常量
|
// 常量
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
// 标准 RAG chunking: ~200 tokens per chunk
|
// 标准 RAG chunking: ~200 tokens per chunk
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { getContext } from '../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import {
|
import {
|
||||||
getMeta,
|
getMeta,
|
||||||
updateMeta,
|
updateMeta,
|
||||||
@@ -15,10 +15,10 @@ import {
|
|||||||
makeChunkId,
|
makeChunkId,
|
||||||
hashText,
|
hashText,
|
||||||
CHUNK_MAX_TOKENS,
|
CHUNK_MAX_TOKENS,
|
||||||
} from './chunk-store.js';
|
} from '../storage/chunk-store.js';
|
||||||
import { embed, getEngineFingerprint } from './embedder.js';
|
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import { filterText } from './text-filter.js';
|
import { filterText } from '../utils/text-filter.js';
|
||||||
|
|
||||||
const MODULE_ID = 'chunk-builder';
|
const MODULE_ID = 'chunk-builder';
|
||||||
|
|
||||||
@@ -339,7 +339,7 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo
|
|||||||
|
|
||||||
// 本地模型未加载时跳过(避免意外触发下载或报错)
|
// 本地模型未加载时跳过(避免意外触发下载或报错)
|
||||||
if (vectorConfig.engine === "local") {
|
if (vectorConfig.engine === "local") {
|
||||||
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("./embedder.js");
|
const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
|
||||||
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
|
const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
|
||||||
if (!isLocalModelLoaded(modelId)) return;
|
if (!isLocalModelLoaded(modelId)) return;
|
||||||
}
|
}
|
||||||
@@ -3,8 +3,8 @@
|
|||||||
// 事件监听 + 回滚钩子注册
|
// 事件监听 + 回滚钩子注册
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { getContext } from '../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import {
|
import {
|
||||||
saveStateAtoms,
|
saveStateAtoms,
|
||||||
saveStateVectors,
|
saveStateVectors,
|
||||||
@@ -12,9 +12,9 @@ import {
|
|||||||
deleteStateVectorsFromFloor,
|
deleteStateVectorsFromFloor,
|
||||||
getStateAtoms,
|
getStateAtoms,
|
||||||
clearStateVectors,
|
clearStateVectors,
|
||||||
} from './state-store.js';
|
} from '../storage/state-store.js';
|
||||||
import { embed, getEngineFingerprint } from './embedder.js';
|
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||||
import { getVectorConfig } from '../data/config.js';
|
import { getVectorConfig } from '../../data/config.js';
|
||||||
|
|
||||||
const MODULE_ID = 'state-integration';
|
const MODULE_ID = 'state-integration';
|
||||||
|
|
||||||
@@ -3,11 +3,11 @@
|
|||||||
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
|
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { getContext } from '../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import { getAllStateVectors, getStateAtoms } from './state-store.js';
|
import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
|
||||||
import { getMeta } from './chunk-store.js';
|
import { getMeta } from '../storage/chunk-store.js';
|
||||||
import { getEngineFingerprint } from './embedder.js';
|
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
|
|
||||||
const MODULE_ID = 'state-recall';
|
const MODULE_ID = 'state-recall';
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
// Story Summary - Recall Engine
|
// Story Summary - Recall Engine
|
||||||
// L1 chunk + L2 event 召回
|
// L1 chunk + L2 event 召回
|
||||||
// - 全量向量打分
|
// - 全量向量打分
|
||||||
// - 实体权重归一化分配
|
// - 实体权重归一化分配
|
||||||
@@ -8,19 +8,19 @@
|
|||||||
// - MMR 去重(融合后执行)
|
// - MMR 去重(融合后执行)
|
||||||
// - floor 稀疏去重
|
// - floor 稀疏去重
|
||||||
|
|
||||||
import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
|
import { getAllChunks, getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
|
||||||
import { embed, getEngineFingerprint } from './embedder.js';
|
import { embed, getEngineFingerprint } from '../utils/embedder.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import { getContext } from '../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../data/store.js';
|
import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../../data/store.js';
|
||||||
import { filterText } from './text-filter.js';
|
import { filterText } from '../utils/text-filter.js';
|
||||||
import {
|
import {
|
||||||
searchStateAtoms,
|
searchStateAtoms,
|
||||||
buildL0FloorBonus,
|
buildL0FloorBonus,
|
||||||
stateToVirtualChunks,
|
stateToVirtualChunks,
|
||||||
mergeAndSparsify,
|
mergeAndSparsify,
|
||||||
} from './state-recall.js';
|
} from '../pipeline/state-recall.js';
|
||||||
import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
|
import { ensureEventTextIndex, searchEventsByText, ensureChunkTextIndex, searchChunksByText } from './text-search.js';
|
||||||
import {
|
import {
|
||||||
extractRareTerms,
|
extractRareTerms,
|
||||||
extractNounsFromFactsO,
|
extractNounsFromFactsO,
|
||||||
@@ -29,10 +29,8 @@ import {
|
|||||||
const MODULE_ID = 'recall';
|
const MODULE_ID = 'recall';
|
||||||
|
|
||||||
const CONFIG = {
|
const CONFIG = {
|
||||||
QUERY_MSG_COUNT: 5,
|
QUERY_MSG_COUNT: 3,
|
||||||
QUERY_DECAY_BETA: 0.7,
|
QUERY_DECAY_BETA: 0.6,
|
||||||
QUERY_MAX_CHARS: 600,
|
|
||||||
QUERY_CONTEXT_CHARS: 240,
|
|
||||||
|
|
||||||
CAUSAL_CHAIN_MAX_DEPTH: 10,
|
CAUSAL_CHAIN_MAX_DEPTH: 10,
|
||||||
CAUSAL_INJECT_MAX: 30,
|
CAUSAL_INJECT_MAX: 30,
|
||||||
@@ -216,11 +214,26 @@ function extractRelationTarget(p) {
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildExpDecayWeights(n, beta) {
|
function buildContentAwareWeights(segments, beta = 0.6) {
|
||||||
|
const n = segments.length;
|
||||||
|
if (n === 0) return [];
|
||||||
|
if (n === 1) return [1.0];
|
||||||
|
|
||||||
const last = n - 1;
|
const last = n - 1;
|
||||||
const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
|
const SHORT_THRESHOLD = 15;
|
||||||
const sum = w.reduce((a, b) => a + b, 0) || 1;
|
const raw = [];
|
||||||
return w.map(x => x / sum);
|
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
const posWeight = Math.exp(beta * (i - last));
|
||||||
|
const len = String(segments[i] || '').replace(/\s+/g, '').length;
|
||||||
|
const contentFactor = len >= SHORT_THRESHOLD
|
||||||
|
? 1.0
|
||||||
|
: Math.max(0.3, Math.sqrt(len / SHORT_THRESHOLD));
|
||||||
|
raw.push(posWeight * contentFactor);
|
||||||
|
}
|
||||||
|
|
||||||
|
const sum = raw.reduce((a, b) => a + b, 0) || 1;
|
||||||
|
return raw.map(w => w / sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -247,19 +260,16 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return messages.slice(-count).map((m, idx, arr) => {
|
return messages.slice(-count)
|
||||||
const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色");
|
.map((m) => cleanForRecall(m.mes) || '')
|
||||||
const clean = cleanForRecall(m.mes);
|
.filter(Boolean);
|
||||||
if (!clean) return '';
|
|
||||||
const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
|
|
||||||
return `${speaker}: ${clean.slice(0, limit)}`;
|
|
||||||
}).filter(Boolean);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function embedWeightedQuery(segments, vectorConfig) {
|
async function embedWeightedQuery(segments, vectorConfig) {
|
||||||
if (!segments?.length) return null;
|
if (!segments?.length) return null;
|
||||||
|
|
||||||
const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
|
const weights = buildContentAwareWeights(segments, CONFIG.QUERY_DECAY_BETA);
|
||||||
|
|
||||||
const vecs = await embed(segments, vectorConfig);
|
const vecs = await embed(segments, vectorConfig);
|
||||||
const dims = vecs?.[0]?.length || 0;
|
const dims = vecs?.[0]?.length || 0;
|
||||||
if (!dims) return null;
|
if (!dims) return null;
|
||||||
@@ -377,19 +387,6 @@ function expandByFacts(presentEntities, facts, maxDepth = 2) {
|
|||||||
// 实体权重归一化(用于加分分配)
|
// 实体权重归一化(用于加分分配)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
function normalizeEntityWeights(queryEntityWeights) {
|
|
||||||
if (!queryEntityWeights?.size) return new Map();
|
|
||||||
|
|
||||||
const total = Array.from(queryEntityWeights.values()).reduce((a, b) => a + b, 0);
|
|
||||||
if (total <= 0) return new Map();
|
|
||||||
|
|
||||||
const normalized = new Map();
|
|
||||||
for (const [entity, weight] of queryEntityWeights) {
|
|
||||||
normalized.set(entity, weight / total);
|
|
||||||
}
|
|
||||||
return normalized;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 文本路 Query 构建(分层高信号词)
|
// 文本路 Query 构建(分层高信号词)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
@@ -548,7 +545,167 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
|||||||
// L1 Chunks 检索
|
// L1 Chunks 检索
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
|
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus = new Map()) {
|
||||||
|
const { chatId } = getContext();
|
||||||
|
if (!chatId || !queryVector?.length) return [];
|
||||||
|
|
||||||
|
const meta = await getMeta(chatId);
|
||||||
|
const fp = getEngineFingerprint(vectorConfig);
|
||||||
|
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||||
|
|
||||||
|
const eventVectors = await getAllEventVectors(chatId);
|
||||||
|
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||||
|
if (!vectorMap.size) return [];
|
||||||
|
|
||||||
|
// 构建/更新文本索引
|
||||||
|
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
|
||||||
|
ensureEventTextIndex(allEvents, revision);
|
||||||
|
|
||||||
|
// 文本路检索
|
||||||
|
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
|
||||||
|
const textGapInfo = textRanked._gapInfo || null;
|
||||||
|
|
||||||
|
// 向量路检索
|
||||||
|
const scored = (allEvents || []).map((event, idx) => {
|
||||||
|
const v = vectorMap.get(event.id);
|
||||||
|
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
|
||||||
|
|
||||||
|
let bonus = 0;
|
||||||
|
|
||||||
|
// L0 加权
|
||||||
|
const range = parseFloorRange(event.summary);
|
||||||
|
if (range) {
|
||||||
|
for (let f = range.start; f <= range.end; f++) {
|
||||||
|
if (l0FloorBonus.has(f)) {
|
||||||
|
bonus += l0FloorBonus.get(f);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const participants = (event.participants || []).map(p => normalize(p));
|
||||||
|
const hasPresent = participants.some(p => queryEntitySet.has(p));
|
||||||
|
|
||||||
|
return {
|
||||||
|
_id: event.id,
|
||||||
|
_idx: idx,
|
||||||
|
event,
|
||||||
|
rawSim,
|
||||||
|
finalScore: rawSim + bonus,
|
||||||
|
vector: v,
|
||||||
|
_hasPresent: hasPresent,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
|
||||||
|
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
|
||||||
|
|
||||||
|
const preFilterDistribution = {
|
||||||
|
total: scored.length,
|
||||||
|
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
||||||
|
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
|
||||||
|
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
|
||||||
|
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
|
||||||
|
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
|
||||||
|
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
|
||||||
|
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
||||||
|
};
|
||||||
|
|
||||||
|
const candidates = scored
|
||||||
|
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||||
|
.sort((a, b) => b.finalScore - a.finalScore)
|
||||||
|
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||||
|
|
||||||
|
const vectorRanked = candidates.map(s => ({
|
||||||
|
event: s.event,
|
||||||
|
similarity: s.finalScore,
|
||||||
|
rawSim: s.rawSim,
|
||||||
|
vector: s.vector,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const eventById = new Map(allEvents.map(e => [e.id, e]));
|
||||||
|
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
|
||||||
|
|
||||||
|
// TEXT-only 质量门槛
|
||||||
|
const textOnlyStats = {
|
||||||
|
total: 0,
|
||||||
|
passedSoftCheck: 0,
|
||||||
|
filtered: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const filtered = fused.filter(x => {
|
||||||
|
if (x.type !== 'TEXT') return true;
|
||||||
|
|
||||||
|
textOnlyStats.total++;
|
||||||
|
|
||||||
|
const sim = x.rawSim || rawSimById.get(x.id) || 0;
|
||||||
|
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
|
||||||
|
textOnlyStats.passedSoftCheck++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
textOnlyStats.filtered++;
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
|
||||||
|
...x,
|
||||||
|
_id: x.id,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const mmrOutput = mmrSelect(
|
||||||
|
mmrInput,
|
||||||
|
CONFIG.MAX_EVENTS,
|
||||||
|
CONFIG.MMR_LAMBDA,
|
||||||
|
c => c.vector || null,
|
||||||
|
c => c.rrf
|
||||||
|
);
|
||||||
|
|
||||||
|
// TEXT-only 限额(MMR 后执行)
|
||||||
|
let textOnlyCount = 0;
|
||||||
|
let textOnlyTruncated = 0;
|
||||||
|
|
||||||
|
const finalResults = mmrOutput.filter(x => {
|
||||||
|
if (x.type !== 'TEXT') return true;
|
||||||
|
|
||||||
|
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
|
||||||
|
textOnlyCount++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
textOnlyTruncated++;
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
textOnlyStats.finalIncluded = textOnlyCount;
|
||||||
|
textOnlyStats.truncatedByLimit = textOnlyTruncated;
|
||||||
|
|
||||||
|
const results = finalResults.map(x => ({
|
||||||
|
event: x.event,
|
||||||
|
similarity: x.rrf,
|
||||||
|
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
|
||||||
|
_recallReason: x.type,
|
||||||
|
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
|
||||||
|
_rawSim: rawSimById.get(x.event?.id) || 0,
|
||||||
|
}));
|
||||||
|
|
||||||
|
if (results.length > 0) {
|
||||||
|
results[0]._preFilterDistribution = preFilterDistribution;
|
||||||
|
results[0]._rrfStats = {
|
||||||
|
vectorCount: vectorRanked.length,
|
||||||
|
textCount: textRanked.length,
|
||||||
|
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
|
||||||
|
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
|
||||||
|
textOnlyTotal: textOnlyStats.total,
|
||||||
|
};
|
||||||
|
results[0]._textOnlyStats = textOnlyStats;
|
||||||
|
results[0]._textGapInfo = textGapInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1, textSearchParams = null) {
|
||||||
const { chatId } = getContext();
|
const { chatId } = getContext();
|
||||||
if (!chatId || !queryVector?.length) return [];
|
if (!chatId || !queryVector?.length) return [];
|
||||||
|
|
||||||
@@ -577,6 +734,58 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// 文本路补充(仅待整理区)
|
||||||
|
let textL1Stats = null;
|
||||||
|
const store = getSummaryStore();
|
||||||
|
const keepVisible = store?.keepVisibleCount ?? 3;
|
||||||
|
const recentStart = lastSummarizedFloor + 1;
|
||||||
|
const recentEnd = (meta?.lastChunkFloor ?? -1) - keepVisible;
|
||||||
|
|
||||||
|
if (textSearchParams && recentEnd >= recentStart && recentEnd >= 0) {
|
||||||
|
const { queryEntities, rareTerms } = textSearchParams;
|
||||||
|
const textQuery = [...(queryEntities || []), ...(rareTerms || [])].join(' ');
|
||||||
|
|
||||||
|
if (textQuery.trim()) {
|
||||||
|
const allChunks = await getAllChunks(chatId);
|
||||||
|
const recentChunks = allChunks.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
|
||||||
|
|
||||||
|
if (recentChunks.length > 0) {
|
||||||
|
const revision = `${chatId}:chunk:${recentEnd}`;
|
||||||
|
ensureChunkTextIndex(recentChunks, revision);
|
||||||
|
|
||||||
|
const textHits = searchChunksByText(textQuery, recentStart, recentEnd, 20);
|
||||||
|
|
||||||
|
textL1Stats = {
|
||||||
|
range: `${recentStart + 1}~${recentEnd + 1}`,
|
||||||
|
candidates: recentChunks.length,
|
||||||
|
hits: textHits.length,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const hit of textHits) {
|
||||||
|
const existingIdx = scored.findIndex(s => s.chunkId === hit.chunkId);
|
||||||
|
|
||||||
|
if (existingIdx >= 0) {
|
||||||
|
scored[existingIdx]._hasTextHit = true;
|
||||||
|
scored[existingIdx]._textRank = hit.textRank;
|
||||||
|
} else {
|
||||||
|
scored.push({
|
||||||
|
_id: hit.chunkId,
|
||||||
|
chunkId: hit.chunkId,
|
||||||
|
floor: hit.floor,
|
||||||
|
chunkIdx: 0,
|
||||||
|
similarity: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
|
||||||
|
_baseSimilarity: 0,
|
||||||
|
_l0Bonus: 0,
|
||||||
|
_recallReason: 'TEXT_L1',
|
||||||
|
_textRank: hit.textRank,
|
||||||
|
vector: null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const candidates = scored
|
const candidates = scored
|
||||||
.filter(s => {
|
.filter(s => {
|
||||||
const threshold = s.floor > lastSummarizedFloor
|
const threshold = s.floor > lastSummarizedFloor
|
||||||
@@ -599,6 +808,7 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
|||||||
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
|
'0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
|
||||||
'<0.55': scored.filter(s => s.similarity < 0.55).length,
|
'<0.55': scored.filter(s => s.similarity < 0.55).length,
|
||||||
},
|
},
|
||||||
|
textL1: textL1Stats,
|
||||||
};
|
};
|
||||||
|
|
||||||
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
|
||||||
@@ -636,6 +846,8 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
|||||||
isUser: chunk.isUser,
|
isUser: chunk.isUser,
|
||||||
text: chunk.text,
|
text: chunk.text,
|
||||||
similarity: item.similarity,
|
similarity: item.similarity,
|
||||||
|
_recallReason: item._recallReason,
|
||||||
|
_textRank: item._textRank,
|
||||||
};
|
};
|
||||||
}).filter(Boolean);
|
}).filter(Boolean);
|
||||||
|
|
||||||
@@ -646,184 +858,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
// L2 Events 检索(RRF 混合 + MMR 后置)
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus = new Map()) {
|
|
||||||
const { chatId } = getContext();
|
|
||||||
if (!chatId || !queryVector?.length) return [];
|
|
||||||
|
|
||||||
const meta = await getMeta(chatId);
|
|
||||||
const fp = getEngineFingerprint(vectorConfig);
|
|
||||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
|
||||||
|
|
||||||
const eventVectors = await getAllEventVectors(chatId);
|
|
||||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
|
||||||
if (!vectorMap.size) return [];
|
|
||||||
|
|
||||||
// 构建/更新文本索引
|
|
||||||
const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
|
|
||||||
ensureEventTextIndex(allEvents, revision);
|
|
||||||
|
|
||||||
// 文本路检索
|
|
||||||
const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
|
|
||||||
const textGapInfo = textRanked._gapInfo || null;
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════
|
|
||||||
// 向量路检索(只保留 L0 加权)
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
const ENTITY_BONUS_POOL = 0.10;
|
|
||||||
|
|
||||||
const scored = (allEvents || []).map((event, idx) => {
|
|
||||||
const v = vectorMap.get(event.id);
|
|
||||||
const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
|
|
||||||
|
|
||||||
let bonus = 0;
|
|
||||||
|
|
||||||
// L0 加权
|
|
||||||
const range = parseFloorRange(event.summary);
|
|
||||||
if (range) {
|
|
||||||
for (let f = range.start; f <= range.end; f++) {
|
|
||||||
if (l0FloorBonus.has(f)) {
|
|
||||||
bonus += l0FloorBonus.get(f);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const participants = (event.participants || []).map(p => normalize(p));
|
|
||||||
let maxEntityWeight = 0;
|
|
||||||
for (const p of participants) {
|
|
||||||
const w = normalizedEntityWeights.get(p) || 0;
|
|
||||||
if (w > maxEntityWeight) {
|
|
||||||
maxEntityWeight = w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const entityBonus = ENTITY_BONUS_POOL * maxEntityWeight;
|
|
||||||
bonus += entityBonus;
|
|
||||||
|
|
||||||
return {
|
|
||||||
_id: event.id,
|
|
||||||
_idx: idx,
|
|
||||||
event,
|
|
||||||
rawSim,
|
|
||||||
finalScore: rawSim + bonus,
|
|
||||||
vector: v,
|
|
||||||
_entityBonus: entityBonus,
|
|
||||||
_hasPresent: maxEntityWeight > 0,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
|
|
||||||
const entityBonusById = new Map(scored.map(s => [s._id, s._entityBonus]));
|
|
||||||
const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
|
|
||||||
|
|
||||||
const preFilterDistribution = {
|
|
||||||
total: scored.length,
|
|
||||||
'0.85+': scored.filter(s => s.finalScore >= 0.85).length,
|
|
||||||
'0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
|
|
||||||
'0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
|
|
||||||
'0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
|
|
||||||
'<0.5': scored.filter(s => s.finalScore < 0.5).length,
|
|
||||||
passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
|
|
||||||
threshold: CONFIG.MIN_SIMILARITY_EVENT,
|
|
||||||
};
|
|
||||||
|
|
||||||
const candidates = scored
|
|
||||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
|
||||||
.sort((a, b) => b.finalScore - a.finalScore)
|
|
||||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
|
||||||
|
|
||||||
const vectorRanked = candidates.map(s => ({
|
|
||||||
event: s.event,
|
|
||||||
similarity: s.finalScore,
|
|
||||||
rawSim: s.rawSim,
|
|
||||||
vector: s.vector,
|
|
||||||
}));
|
|
||||||
|
|
||||||
const eventById = new Map(allEvents.map(e => [e.id, e]));
|
|
||||||
const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
|
|
||||||
|
|
||||||
const textOnlyStats = {
|
|
||||||
total: 0,
|
|
||||||
passedSoftCheck: 0,
|
|
||||||
filtered: 0,
|
|
||||||
finalIncluded: 0,
|
|
||||||
truncatedByLimit: 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
const filtered = fused.filter(x => {
|
|
||||||
if (x.type !== 'TEXT') return true;
|
|
||||||
|
|
||||||
textOnlyStats.total++;
|
|
||||||
const sim = x.rawSim || rawSimById.get(x.id) || 0;
|
|
||||||
if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
|
|
||||||
textOnlyStats.passedSoftCheck++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
textOnlyStats.filtered++;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
|
|
||||||
...x,
|
|
||||||
_id: x.id,
|
|
||||||
}));
|
|
||||||
|
|
||||||
const mmrOutput = mmrSelect(
|
|
||||||
mmrInput,
|
|
||||||
CONFIG.MAX_EVENTS,
|
|
||||||
CONFIG.MMR_LAMBDA,
|
|
||||||
c => c.vector || null,
|
|
||||||
c => c.rrf
|
|
||||||
);
|
|
||||||
|
|
||||||
let textOnlyCount = 0;
|
|
||||||
const finalResults = mmrOutput.filter(x => {
|
|
||||||
if (x.type !== 'TEXT') return true;
|
|
||||||
if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
|
|
||||||
textOnlyCount++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
textOnlyStats.truncatedByLimit++;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
textOnlyStats.finalIncluded = textOnlyCount;
|
|
||||||
|
|
||||||
const results = finalResults.map(x => ({
|
|
||||||
event: x.event,
|
|
||||||
similarity: x.rrf,
|
|
||||||
_recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
|
|
||||||
_recallReason: x.type,
|
|
||||||
_rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
|
|
||||||
_entityBonus: entityBonusById.get(x.event?.id) || 0,
|
|
||||||
_rawSim: rawSimById.get(x.event?.id) || 0,
|
|
||||||
}));
|
|
||||||
|
|
||||||
// 统计信息附加到第一条结果
|
|
||||||
if (results.length > 0) {
|
|
||||||
results[0]._preFilterDistribution = preFilterDistribution;
|
|
||||||
results[0]._rrfStats = {
|
|
||||||
vectorCount: vectorRanked.length,
|
|
||||||
textCount: textRanked.length,
|
|
||||||
hybridCount: fused.filter(x => x.type === 'HYBRID').length,
|
|
||||||
vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
|
|
||||||
textOnlyTotal: textOnlyStats.total,
|
|
||||||
};
|
|
||||||
results[0]._textOnlyStats = textOnlyStats;
|
|
||||||
results[0]._textGapInfo = textGapInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
// 日志
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
function formatRecallLog({
|
function formatRecallLog({
|
||||||
elapsed,
|
elapsed,
|
||||||
segments,
|
segments,
|
||||||
@@ -831,7 +865,7 @@ function formatRecallLog({
|
|||||||
chunkResults,
|
chunkResults,
|
||||||
eventResults,
|
eventResults,
|
||||||
allEvents,
|
allEvents,
|
||||||
normalizedEntityWeights = new Map(),
|
queryEntities = [],
|
||||||
causalEvents = [],
|
causalEvents = [],
|
||||||
chunkPreFilterStats = null,
|
chunkPreFilterStats = null,
|
||||||
l0Results = [],
|
l0Results = [],
|
||||||
@@ -840,15 +874,15 @@ function formatRecallLog({
|
|||||||
textQueryBreakdown = null,
|
textQueryBreakdown = null,
|
||||||
}) {
|
}) {
|
||||||
const lines = [
|
const lines = [
|
||||||
'\u2554' + '\u2550'.repeat(62) + '\u2557',
|
'╔' + '═'.repeat(62) + '╗',
|
||||||
'\u2551 记忆召回报告 \u2551',
|
'║ 记忆召回报告 ║',
|
||||||
'\u2560' + '\u2550'.repeat(62) + '\u2563',
|
'╠' + '═'.repeat(62) + '╣',
|
||||||
`\u2551 耗时: ${elapsed}ms`,
|
`║ 耗时: ${elapsed}ms`,
|
||||||
'\u255a' + '\u2550'.repeat(62) + '\u255d',
|
'╚' + '═'.repeat(62) + '╝',
|
||||||
'',
|
'',
|
||||||
'\u250c' + '\u2500'.repeat(61) + '\u2510',
|
'┌' + '─'.repeat(61) + '┐',
|
||||||
'\u2502 【查询构建】最近 5 条消息,指数衰减加权 (β=0.7) \u2502',
|
`│ 【查询构建】最近 ${CONFIG.QUERY_MSG_COUNT} 条,内容感知加权 (β=${CONFIG.QUERY_DECAY_BETA}) │`,
|
||||||
'\u2514' + '\u2500'.repeat(61) + '\u2518',
|
'└' + '─'.repeat(61) + '┘',
|
||||||
];
|
];
|
||||||
|
|
||||||
const segmentsSorted = segments.map((s, i) => ({
|
const segmentsSorted = segments.map((s, i) => ({
|
||||||
@@ -858,25 +892,19 @@ function formatRecallLog({
|
|||||||
})).sort((a, b) => b.weight - a.weight);
|
})).sort((a, b) => b.weight - a.weight);
|
||||||
|
|
||||||
segmentsSorted.forEach((s, rank) => {
|
segmentsSorted.forEach((s, rank) => {
|
||||||
const bar = '\u2588'.repeat(Math.round(s.weight * 20));
|
const bar = '█'.repeat(Math.round(s.weight * 20));
|
||||||
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
|
||||||
const marker = rank === 0 ? ' ◀ 主导' : '';
|
const marker = rank === 0 ? ' ◀ 主导' : '';
|
||||||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||||
lines.push('\u2502 【提取实体】 \u2502');
|
lines.push('│ 【提取实体】 │');
|
||||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
lines.push('└' + '─'.repeat(61) + '┘');
|
||||||
|
|
||||||
if (normalizedEntityWeights?.size) {
|
if (queryEntities?.length) {
|
||||||
const sorted = Array.from(normalizedEntityWeights.entries())
|
lines.push(` 焦点: ${queryEntities.slice(0, 8).join('、')}${queryEntities.length > 8 ? ' ...' : ''}`);
|
||||||
.sort((a, b) => b[1] - a[1])
|
|
||||||
.slice(0, 8);
|
|
||||||
const formatted = sorted
|
|
||||||
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
|
|
||||||
.join(' | ');
|
|
||||||
lines.push(` ${formatted}`);
|
|
||||||
} else {
|
} else {
|
||||||
lines.push(' (无)');
|
lines.push(' (无)');
|
||||||
}
|
}
|
||||||
@@ -885,9 +913,9 @@ function formatRecallLog({
|
|||||||
}
|
}
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||||
lines.push('\u2502 【文本路 Query 构成】 \u2502');
|
lines.push('│ 【文本路 Query 构成】 │');
|
||||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
lines.push('└' + '─'.repeat(61) + '┘');
|
||||||
|
|
||||||
if (textQueryBreakdown) {
|
if (textQueryBreakdown) {
|
||||||
const bd = textQueryBreakdown;
|
const bd = textQueryBreakdown;
|
||||||
@@ -919,23 +947,9 @@ function formatRecallLog({
|
|||||||
}
|
}
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
lines.push(' 实体归一化(用于加分):');
|
lines.push('┌' + '─'.repeat(61) + '┐');
|
||||||
if (normalizedEntityWeights?.size) {
|
lines.push('│ 【召回统计】 │');
|
||||||
const sorted = Array.from(normalizedEntityWeights.entries())
|
lines.push('└' + '─'.repeat(61) + '┘');
|
||||||
.sort((a, b) => b[1] - a[1])
|
|
||||||
.slice(0, 8);
|
|
||||||
const formatted = sorted
|
|
||||||
.map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
|
|
||||||
.join(' | ');
|
|
||||||
lines.push(` ${formatted}`);
|
|
||||||
} else {
|
|
||||||
lines.push(' (无)');
|
|
||||||
}
|
|
||||||
|
|
||||||
lines.push('');
|
|
||||||
lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
|
|
||||||
lines.push('\u2502 【召回统计】 \u2502');
|
|
||||||
lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
|
|
||||||
|
|
||||||
// L0
|
// L0
|
||||||
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
||||||
@@ -953,6 +967,11 @@ function formatRecallLog({
|
|||||||
const dist = chunkPreFilterStats.distribution || {};
|
const dist = chunkPreFilterStats.distribution || {};
|
||||||
lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length} 条`);
|
lines.push(` 全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length} 条`);
|
||||||
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
|
lines.push(` 匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
|
||||||
|
|
||||||
|
const textL1 = chunkPreFilterStats.textL1;
|
||||||
|
if (textL1) {
|
||||||
|
lines.push(` 文本路补充(待整理区): 范围 ${textL1.range}楼 | 候选 ${textL1.candidates} 条 | 命中 ${textL1.hits} 条`);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
lines.push(` 选入: ${chunkResults.length} 条`);
|
lines.push(` 选入: ${chunkResults.length} 条`);
|
||||||
}
|
}
|
||||||
@@ -988,9 +1007,6 @@ function formatRecallLog({
|
|||||||
lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
|
lines.push(` ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const entityBoostedEvents = eventResults.filter(e => e._entityBonus > 0).length;
|
|
||||||
lines.push('');
|
|
||||||
lines.push(` 实体加分事件: ${entityBoostedEvents} 条`);
|
|
||||||
|
|
||||||
if (textGapInfo) {
|
if (textGapInfo) {
|
||||||
lines.push('');
|
lines.push('');
|
||||||
@@ -1002,7 +1018,6 @@ function formatRecallLog({
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Causal
|
|
||||||
if (causalEvents.length) {
|
if (causalEvents.length) {
|
||||||
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
|
const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
|
||||||
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
|
const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
|
||||||
@@ -1012,13 +1027,8 @@ function formatRecallLog({
|
|||||||
}
|
}
|
||||||
|
|
||||||
lines.push('');
|
lines.push('');
|
||||||
return lines.join('\n');
|
return lines.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
// 主入口
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
|
export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
|
||||||
const T0 = performance.now();
|
const T0 = performance.now();
|
||||||
const { chat } = getContext();
|
const { chat } = getContext();
|
||||||
@@ -1049,9 +1059,9 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
const lexicon = buildEntityLexicon(store, allEvents);
|
const lexicon = buildEntityLexicon(store, allEvents);
|
||||||
const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
|
const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
|
||||||
const queryEntities = Array.from(queryEntityWeights.keys());
|
const queryEntities = Array.from(queryEntityWeights.keys());
|
||||||
|
const queryEntitySet = new Set(queryEntities.map(normalize));
|
||||||
const facts = getFacts(store);
|
const facts = getFacts(store);
|
||||||
const expandedTerms = expandByFacts(queryEntities, facts, 2);
|
const expandedTerms = expandByFacts(queryEntities, facts, 2);
|
||||||
const normalizedEntityWeights = normalizeEntityWeights(queryEntityWeights);
|
|
||||||
|
|
||||||
let queryTextForSearch = '';
|
let queryTextForSearch = '';
|
||||||
let textQueryBreakdown = null;
|
let textQueryBreakdown = null;
|
||||||
@@ -1079,8 +1089,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
}
|
}
|
||||||
|
|
||||||
const [chunkResults, eventResults] = await Promise.all([
|
const [chunkResults, eventResults] = await Promise.all([
|
||||||
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
|
searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor, {
|
||||||
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus),
|
queryEntities,
|
||||||
|
rareTerms: textQueryBreakdown?.rareTerms || [],
|
||||||
|
}),
|
||||||
|
searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||||||
@@ -1118,7 +1131,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
|||||||
chunkResults: mergedChunks,
|
chunkResults: mergedChunks,
|
||||||
eventResults,
|
eventResults,
|
||||||
allEvents,
|
allEvents,
|
||||||
normalizedEntityWeights,
|
queryEntities,
|
||||||
causalEvents: causalEventsTruncated,
|
causalEvents: causalEventsTruncated,
|
||||||
chunkPreFilterStats,
|
chunkPreFilterStats,
|
||||||
l0Results,
|
l0Results,
|
||||||
@@ -1149,3 +1162,8 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
|
|||||||
return `${speaker}: ${text.slice(0, 500)}`;
|
return `${speaker}: ${text.slice(0, 500)}`;
|
||||||
}).filter(Boolean).join('\n');
|
}).filter(Boolean).join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
// text-search.js - 最终版
|
// text-search.js - 最终版
|
||||||
|
|
||||||
import MiniSearch from '../../../libs/minisearch.mjs';
|
import MiniSearch from '../../../../libs/minisearch.mjs';
|
||||||
|
|
||||||
const STOP_WORDS = new Set([
|
const STOP_WORDS = new Set([
|
||||||
'的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
|
'的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
|
||||||
@@ -106,7 +106,7 @@ export function ensureEventTextIndex(events, revision) {
|
|||||||
*
|
*
|
||||||
* 参考:帕累托法则(80/20 法则)在信息检索中的应用
|
* 参考:帕累托法则(80/20 法则)在信息检索中的应用
|
||||||
*/
|
*/
|
||||||
function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
|
export function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
|
||||||
if (!scores.length) return 0;
|
if (!scores.length) return 0;
|
||||||
|
|
||||||
const total = scores.reduce((a, b) => a + b, 0);
|
const total = scores.reduce((a, b) => a + b, 0);
|
||||||
@@ -171,3 +171,67 @@ export function clearEventTextIndex() {
|
|||||||
idx = null;
|
idx = null;
|
||||||
lastRevision = null;
|
lastRevision = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Chunk 文本索引(待整理区 L1 补充)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let chunkIdx = null;
|
||||||
|
let chunkIdxRevision = null;
|
||||||
|
|
||||||
|
export function ensureChunkTextIndex(chunks, revision) {
|
||||||
|
if (chunkIdx && revision === chunkIdxRevision) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
chunkIdx = new MiniSearch({
|
||||||
|
fields: ['text'],
|
||||||
|
storeFields: ['chunkId', 'floor'],
|
||||||
|
tokenize,
|
||||||
|
searchOptions: { tokenize },
|
||||||
|
});
|
||||||
|
|
||||||
|
chunkIdx.addAll(chunks.map(c => ({
|
||||||
|
id: c.chunkId,
|
||||||
|
chunkId: c.chunkId,
|
||||||
|
floor: c.floor,
|
||||||
|
text: c.text || '',
|
||||||
|
})));
|
||||||
|
|
||||||
|
chunkIdxRevision = revision;
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[text-search] Chunk index build failed:', e);
|
||||||
|
chunkIdx = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function searchChunksByText(query, floorMin, floorMax, limit = 20) {
|
||||||
|
if (!chunkIdx || !query?.trim()) return [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const results = chunkIdx.search(query, {
|
||||||
|
fuzzy: false,
|
||||||
|
prefix: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
const filtered = results.filter(r => r.floor >= floorMin && r.floor <= floorMax);
|
||||||
|
if (!filtered.length) return [];
|
||||||
|
|
||||||
|
const scores = filtered.map(r => r.score);
|
||||||
|
const k = dynamicTopK(scores, 0.85, 5, limit);
|
||||||
|
|
||||||
|
return filtered.slice(0, k).map((r, i) => ({
|
||||||
|
chunkId: r.chunkId,
|
||||||
|
floor: r.floor,
|
||||||
|
textRank: i + 1,
|
||||||
|
score: r.score,
|
||||||
|
}));
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[text-search] Chunk search failed:', e);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearChunkTextIndex() {
|
||||||
|
chunkIdx = null;
|
||||||
|
chunkIdxRevision = null;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import { extensionFolderPath } from '../../../core/constants.js';
|
import { extensionFolderPath } from '../../../../core/constants.js';
|
||||||
|
|
||||||
const MODULE_ID = 'tokenizer';
|
const MODULE_ID = 'tokenizer';
|
||||||
|
|
||||||
@@ -8,7 +8,7 @@ import {
|
|||||||
chunkVectorsTable,
|
chunkVectorsTable,
|
||||||
eventVectorsTable,
|
eventVectorsTable,
|
||||||
CHUNK_MAX_TOKENS,
|
CHUNK_MAX_TOKENS,
|
||||||
} from '../data/db.js';
|
} from '../../data/db.js';
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
// 工具函数
|
// 工具函数
|
||||||
@@ -4,11 +4,11 @@
|
|||||||
// StateVector 存 IndexedDB(可重建)
|
// StateVector 存 IndexedDB(可重建)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { saveMetadataDebounced } from '../../../../../../extensions.js';
|
import { saveMetadataDebounced } from '../../../../../../../extensions.js';
|
||||||
import { chat_metadata } from '../../../../../../../script.js';
|
import { chat_metadata } from '../../../../../../../../script.js';
|
||||||
import { stateVectorsTable } from '../data/db.js';
|
import { stateVectorsTable } from '../../data/db.js';
|
||||||
import { EXT_ID } from '../../../core/constants.js';
|
import { EXT_ID } from '../../../../core/constants.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
|
|
||||||
const MODULE_ID = 'state-store';
|
const MODULE_ID = 'state-store';
|
||||||
|
|
||||||
@@ -3,9 +3,9 @@
|
|||||||
// 向量数据导入导出(当前 chatId 级别)
|
// 向量数据导入导出(当前 chatId 级别)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs';
|
import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
|
||||||
import { getContext } from '../../../../../../extensions.js';
|
import { getContext } from '../../../../../../../extensions.js';
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
import {
|
import {
|
||||||
getMeta,
|
getMeta,
|
||||||
updateMeta,
|
updateMeta,
|
||||||
@@ -26,8 +26,8 @@ import {
|
|||||||
saveStateVectors,
|
saveStateVectors,
|
||||||
clearStateVectors,
|
clearStateVectors,
|
||||||
} from './state-store.js';
|
} from './state-store.js';
|
||||||
import { getEngineFingerprint } from './embedder.js';
|
import { getEngineFingerprint } from '../utils/embedder.js';
|
||||||
import { getVectorConfig } from '../data/config.js';
|
import { getVectorConfig } from '../../data/config.js';
|
||||||
|
|
||||||
const MODULE_ID = 'vector-io';
|
const MODULE_ID = 'vector-io';
|
||||||
const EXPORT_VERSION = 1;
|
const EXPORT_VERSION = 1;
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// 统一的向量生成接口(本地模型 / 在线服务)
|
// 统一的向量生成接口(本地模型 / 在线服务)
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { xbLog } from '../../../core/debug-core.js';
|
import { xbLog } from '../../../../core/debug-core.js';
|
||||||
|
|
||||||
const MODULE_ID = 'embedding';
|
const MODULE_ID = 'embedding';
|
||||||
|
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
// 跳过用户定义的「起始→结束」区间
|
// 跳过用户定义的「起始→结束」区间
|
||||||
// ═══════════════════════════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
import { getTextFilterRules } from '../data/config.js';
|
import { getTextFilterRules } from '../../data/config.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 转义正则特殊字符
|
* 转义正则特殊字符
|
||||||
Reference in New Issue
Block a user