Update recall metrics and context pairing
This commit is contained in:
@@ -1,10 +1,18 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - Prompt Injection (v4 - 统一命名)
|
||||
// Story Summary - Prompt Injection (v5 - Two-Stage: L0 Locate → L1 Evidence)
|
||||
//
|
||||
// 命名规范:
|
||||
// - 存储层用 L0/L1/L2/L3(StateAtom/Chunk/Event/Fact)
|
||||
// - 装配层用语义名称:constraint/event/evidence/arc
|
||||
//
|
||||
// 架构变更(v4 → v5):
|
||||
// - L0 和 L1 不再在同一个池子竞争
|
||||
// - recall.js 返回 {l0Selected[], l1ByFloor: Map} 而非 evidenceChunks[]
|
||||
// - 装配层按 L2→L0→L1 层级组织
|
||||
// - 预算以"L0 + USER top-1 + AI top-1"为原子单元
|
||||
// - 孤立 L1(无对应 L0)丢弃
|
||||
// - 孤立 L0(无对应 L1)保留
|
||||
//
|
||||
// 职责:
|
||||
// - 仅负责"构建注入文本",不负责写入 extension_prompts
|
||||
// - 注入发生在 story-summary.js:GENERATION_STARTED 时写入 extension_prompts
|
||||
@@ -15,7 +23,7 @@ import { xbLog } from "../../../core/debug-core.js";
|
||||
import { getSummaryStore, getFacts, isRelationFact } from "../data/store.js";
|
||||
import { getVectorConfig, getSummaryPanelConfig, getSettings } from "../data/config.js";
|
||||
import { recallMemory } from "../vector/retrieval/recall.js";
|
||||
import { getChunksByFloors, getAllChunkVectors, getAllEventVectors, getMeta } from "../vector/storage/chunk-store.js";
|
||||
import { getMeta } from "../vector/storage/chunk-store.js";
|
||||
|
||||
// Metrics
|
||||
import { formatMetricsLog, detectIssues } from "../vector/retrieval/metrics.js";
|
||||
@@ -79,23 +87,6 @@ function pushWithBudget(lines, text, state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算余弦相似度
|
||||
* @param {number[]} a - 向量A
|
||||
* @param {number[]} b - 向量B
|
||||
* @returns {number} 相似度
|
||||
*/
|
||||
function cosineSimilarity(a, b) {
|
||||
if (!a?.length || !b?.length || a.length !== b.length) return 0;
|
||||
let dot = 0, nA = 0, nB = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dot += a[i] * b[i];
|
||||
nA += a[i] * a[i];
|
||||
nB += b[i] * b[i];
|
||||
}
|
||||
return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析事件摘要中的楼层范围
|
||||
* @param {string} summary - 事件摘要
|
||||
@@ -134,46 +125,27 @@ function normalize(s) {
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 上下文配对工具函数
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 获取 chunk 的上下文楼层
|
||||
* @param {object} chunk - chunk 对象
|
||||
* @returns {number} 上下文楼层(-1 表示无)
|
||||
* 获取事件排序键
|
||||
* @param {object} event - 事件对象
|
||||
* @returns {number} 排序键
|
||||
*/
|
||||
function getContextFloor(chunk) {
|
||||
if (chunk.isAnchorVirtual) return -1;
|
||||
return chunk.isUser ? chunk.floor + 1 : chunk.floor - 1;
|
||||
function getEventSortKey(event) {
|
||||
const r = parseFloorRange(event?.summary);
|
||||
if (r) return r.start;
|
||||
const m = String(event?.id || "").match(/evt-(\d+)/);
|
||||
return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER;
|
||||
}
|
||||
|
||||
/**
|
||||
* 选择上下文 chunk
|
||||
* @param {object[]} candidates - 候选 chunks
|
||||
* @param {object} mainChunk - 主 chunk
|
||||
* @returns {object|null} 选中的上下文 chunk
|
||||
* 重新编号事件文本
|
||||
* @param {string} text - 原始文本
|
||||
* @param {number} newIndex - 新编号
|
||||
* @returns {string} 重新编号后的文本
|
||||
*/
|
||||
function pickContextChunk(candidates, mainChunk) {
|
||||
if (!candidates?.length) return null;
|
||||
const targetIsUser = !mainChunk.isUser;
|
||||
const opposite = candidates.find(c => c.isUser === targetIsUser);
|
||||
if (opposite) return opposite;
|
||||
return candidates[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化上下文 chunk 行
|
||||
* @param {object} chunk - chunk 对象
|
||||
* @param {boolean} isAbove - 是否在上方
|
||||
* @returns {string} 格式化后的行
|
||||
*/
|
||||
function formatContextChunkLine(chunk, isAbove) {
|
||||
const { name1, name2 } = getContext();
|
||||
const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
|
||||
const text = String(chunk.text || "").trim();
|
||||
const symbol = isAbove ? "┌" : "└";
|
||||
return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`;
|
||||
function renumberEventText(text, newIndex) {
|
||||
const s = String(text || "");
|
||||
return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -259,10 +231,8 @@ function filterConstraintsByRelevance(facts, focusEntities, knownCharacters) {
|
||||
const focusSet = new Set((focusEntities || []).map(normalize));
|
||||
|
||||
return facts.filter(f => {
|
||||
// isState 的 facts 始终保留
|
||||
if (f._isState === true) return true;
|
||||
|
||||
// 关系类 facts:检查 from/to 是否在焦点中
|
||||
if (isRelationFact(f)) {
|
||||
const from = normalize(f.s);
|
||||
const target = parseRelationTarget(f.p);
|
||||
@@ -272,7 +242,6 @@ function filterConstraintsByRelevance(facts, focusEntities, knownCharacters) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 其他 facts:检查主体是否在焦点中
|
||||
const subjectNorm = normalize(f.s);
|
||||
if (knownCharacters.has(subjectNorm)) {
|
||||
return focusSet.has(subjectNorm);
|
||||
@@ -326,28 +295,34 @@ function formatArcLine(arc) {
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化 evidence chunk 完整行
|
||||
* @param {object} chunk - chunk 对象
|
||||
* 格式化 L0 锚点行
|
||||
* @param {object} l0 - L0 对象
|
||||
* @returns {string} 格式化后的行
|
||||
*/
|
||||
function formatEvidenceFullLine(chunk) {
|
||||
function formatL0Line(l0) {
|
||||
return ` › #${l0.floor + 1} [📌] ${String(l0.text || l0.atom?.semantic || "").trim()}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化 L1 chunk 行(挂在 L0 下方)
|
||||
* @param {object} chunk - L1 chunk 对象
|
||||
* @param {boolean} isContext - 是否为上下文(USER 侧)
|
||||
* @returns {string} 格式化后的行
|
||||
*/
|
||||
function formatL1Line(chunk, isContext) {
|
||||
const { name1, name2 } = getContext();
|
||||
|
||||
if (chunk.isAnchorVirtual) {
|
||||
return `› #${chunk.floor + 1} [📌] ${String(chunk.text || "").trim()}`;
|
||||
}
|
||||
|
||||
const speaker = chunk.isUser ? (name1 || "用户") : (chunk.speaker || name2 || "角色");
|
||||
return `› #${chunk.floor + 1} [${speaker}] ${String(chunk.text || "").trim()}`;
|
||||
const text = String(chunk.text || "").trim();
|
||||
const symbol = isContext ? "┌" : "›";
|
||||
return ` ${symbol} #${chunk.floor + 1} [${speaker}] ${text}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化因果事件行
|
||||
* @param {object} causalItem - 因果事件项
|
||||
* @param {Map} causalById - 因果事件索引
|
||||
* @returns {string} 格式化后的行
|
||||
*/
|
||||
function formatCausalEventLine(causalItem, causalById) {
|
||||
function formatCausalEventLine(causalItem) {
|
||||
const ev = causalItem?.event || {};
|
||||
const depth = Math.max(1, Math.min(9, causalItem?._causalDepth || 1));
|
||||
const indent = " │" + " ".repeat(depth - 1);
|
||||
@@ -365,128 +340,128 @@ function formatCausalEventLine(causalItem, causalById) {
|
||||
const body = `${summary}${floorHint ? ` ${floorHint}` : ""}`.trim();
|
||||
lines.push(`${indent} ${body}`);
|
||||
|
||||
const evidence = causalItem._evidenceChunk;
|
||||
if (evidence) {
|
||||
const speaker = evidence.speaker || "角色";
|
||||
const text = String(evidence.text || "").trim();
|
||||
lines.push(`${indent} › #${evidence.floor + 1} [${speaker}] ${text}`);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* 重新编号事件文本
|
||||
* @param {string} text - 原始文本
|
||||
* @param {number} newIndex - 新编号
|
||||
* @returns {string} 重新编号后的文本
|
||||
*/
|
||||
function renumberEventText(text, newIndex) {
|
||||
const s = String(text || "");
|
||||
return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取事件排序键
|
||||
* @param {object} event - 事件对象
|
||||
* @returns {number} 排序键
|
||||
*/
|
||||
function getEventSortKey(event) {
|
||||
const r = parseFloorRange(event?.summary);
|
||||
if (r) return r.start;
|
||||
const m = String(event?.id || "").match(/evt-(\d+)/);
|
||||
return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 按楼层分组装配 evidence(修复上下文重复)
|
||||
// L0→L1 证据单元构建
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 按楼层装配 evidence
|
||||
* @param {object[]} evidenceCandidates - 候选 evidence
|
||||
* @param {Map} contextChunksByFloor - 上下文 chunks 索引
|
||||
* @param {object} budget - 预算状态
|
||||
* @returns {{lines: string[], anchorCount: number, contextPairsCount: number}}
|
||||
* @typedef {object} EvidenceUnit
|
||||
* @property {object} l0 - L0 锚点对象
|
||||
* @property {object|null} userL1 - USER 侧 top-1 L1 chunk
|
||||
* @property {object|null} aiL1 - AI 侧 top-1 L1 chunk
|
||||
* @property {number} totalTokens - 整个单元的 token 估算
|
||||
*/
|
||||
function assembleEvidenceByFloor(evidenceCandidates, contextChunksByFloor, budget) {
|
||||
if (!evidenceCandidates?.length) {
|
||||
return { lines: [], anchorCount: 0, contextPairsCount: 0 };
|
||||
}
|
||||
|
||||
// 1. 按楼层分组
|
||||
const byFloor = new Map();
|
||||
for (const c of evidenceCandidates) {
|
||||
const arr = byFloor.get(c.floor) || [];
|
||||
arr.push(c);
|
||||
byFloor.set(c.floor, arr);
|
||||
}
|
||||
/**
|
||||
* 为一个 L0 构建证据单元
|
||||
* @param {object} l0 - L0 对象
|
||||
* @param {Map<number, object>} l1ByFloor - 楼层→L1配对映射
|
||||
* @returns {EvidenceUnit}
|
||||
*/
|
||||
function buildEvidenceUnit(l0, l1ByFloor) {
|
||||
const pair = l1ByFloor.get(l0.floor);
|
||||
const userL1 = pair?.userTop1 || null;
|
||||
const aiL1 = pair?.aiTop1 || null;
|
||||
|
||||
// 2. 楼层内按 chunkIdx 排序
|
||||
for (const [, chunks] of byFloor) {
|
||||
chunks.sort((a, b) => (a.chunkIdx ?? 0) - (b.chunkIdx ?? 0));
|
||||
}
|
||||
// 预计算整个单元的 token 开销
|
||||
let totalTokens = estimateTokens(formatL0Line(l0));
|
||||
if (userL1) totalTokens += estimateTokens(formatL1Line(userL1, true));
|
||||
if (aiL1) totalTokens += estimateTokens(formatL1Line(aiL1, false));
|
||||
|
||||
// 3. 按楼层顺序装配
|
||||
const floorsSorted = Array.from(byFloor.keys()).sort((a, b) => a - b);
|
||||
return { l0, userL1, aiL1, totalTokens };
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化一个证据单元为文本行
|
||||
* @param {EvidenceUnit} unit - 证据单元
|
||||
* @returns {string[]} 文本行数组
|
||||
*/
|
||||
function formatEvidenceUnit(unit) {
|
||||
const lines = [];
|
||||
let anchorCount = 0;
|
||||
let contextPairsCount = 0;
|
||||
lines.push(formatL0Line(unit.l0));
|
||||
if (unit.userL1) {
|
||||
lines.push(formatL1Line(unit.userL1, true));
|
||||
}
|
||||
if (unit.aiL1) {
|
||||
lines.push(formatL1Line(unit.aiL1, false));
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
for (const floor of floorsSorted) {
|
||||
const chunks = byFloor.get(floor);
|
||||
if (!chunks?.length) continue;
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 事件证据收集
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// 分离锚点虚拟 chunks 和真实 chunks
|
||||
const anchorChunks = chunks.filter(c => c.isAnchorVirtual);
|
||||
const realChunks = chunks.filter(c => !c.isAnchorVirtual);
|
||||
/**
|
||||
* 为事件收集范围内的 L0 证据单元
|
||||
* @param {object} eventObj - 事件对象
|
||||
* @param {object[]} l0Selected - 所有选中的 L0
|
||||
* @param {Map<number, object>} l1ByFloor - 楼层→L1配对映射
|
||||
* @param {Set<string>} usedL0Ids - 已消费的 L0 ID 集合(会被修改)
|
||||
* @returns {EvidenceUnit[]} 该事件的证据单元列表
|
||||
*/
|
||||
function collectEvidenceForEvent(eventObj, l0Selected, l1ByFloor, usedL0Ids) {
|
||||
const range = parseFloorRange(eventObj?.summary);
|
||||
if (!range) return [];
|
||||
|
||||
// 锚点直接输出(不需要上下文)
|
||||
for (const c of anchorChunks) {
|
||||
const line = formatEvidenceFullLine(c);
|
||||
if (!pushWithBudget(lines, line, budget)) {
|
||||
return { lines, anchorCount, contextPairsCount };
|
||||
}
|
||||
anchorCount++;
|
||||
}
|
||||
const units = [];
|
||||
|
||||
// 真实 chunks 按楼层统一处理
|
||||
if (realChunks.length > 0) {
|
||||
const firstChunk = realChunks[0];
|
||||
const pairFloor = getContextFloor(firstChunk);
|
||||
const pairCandidates = contextChunksByFloor.get(pairFloor) || [];
|
||||
const contextChunk = pickContextChunk(pairCandidates, firstChunk);
|
||||
for (const l0 of l0Selected) {
|
||||
if (usedL0Ids.has(l0.id)) continue;
|
||||
if (l0.floor < range.start || l0.floor > range.end) continue;
|
||||
|
||||
// 上下文在前
|
||||
if (contextChunk && contextChunk.floor < floor) {
|
||||
const contextLine = formatContextChunkLine(contextChunk, true);
|
||||
if (!pushWithBudget(lines, contextLine, budget)) {
|
||||
return { lines, anchorCount, contextPairsCount };
|
||||
}
|
||||
contextPairsCount++;
|
||||
}
|
||||
|
||||
// 输出该楼层所有真实 chunks
|
||||
for (const c of realChunks) {
|
||||
const line = formatEvidenceFullLine(c);
|
||||
if (!pushWithBudget(lines, line, budget)) {
|
||||
return { lines, anchorCount, contextPairsCount };
|
||||
}
|
||||
}
|
||||
|
||||
// 上下文在后
|
||||
if (contextChunk && contextChunk.floor > floor) {
|
||||
const contextLine = formatContextChunkLine(contextChunk, false);
|
||||
if (!pushWithBudget(lines, contextLine, budget)) {
|
||||
return { lines, anchorCount, contextPairsCount };
|
||||
}
|
||||
contextPairsCount++;
|
||||
}
|
||||
}
|
||||
const unit = buildEvidenceUnit(l0, l1ByFloor);
|
||||
units.push(unit);
|
||||
usedL0Ids.add(l0.id);
|
||||
}
|
||||
|
||||
return { lines, anchorCount, contextPairsCount };
|
||||
// 按楼层排序
|
||||
units.sort((a, b) => a.l0.floor - b.l0.floor);
|
||||
|
||||
return units;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 事件格式化(L2→L0→L1 层级)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 格式化事件(含 L0→L1 证据)
|
||||
* @param {object} eventItem - 事件召回项
|
||||
* @param {number} idx - 编号
|
||||
* @param {EvidenceUnit[]} evidenceUnits - 该事件的证据单元
|
||||
* @param {Map<string, object>} causalById - 因果事件索引
|
||||
* @returns {string} 格式化后的文本
|
||||
*/
|
||||
function formatEventWithEvidence(eventItem, idx, evidenceUnits, causalById) {
|
||||
const ev = eventItem.event || {};
|
||||
const time = ev.timeLabel || "";
|
||||
const title = String(ev.title || "").trim();
|
||||
const people = (ev.participants || []).join(" / ").trim();
|
||||
const summary = cleanSummary(ev.summary);
|
||||
|
||||
const displayTitle = title || people || ev.id || "事件";
|
||||
const header = time ? `${idx}.【${time}】${displayTitle}` : `${idx}. ${displayTitle}`;
|
||||
|
||||
const lines = [header];
|
||||
if (people && displayTitle !== people) lines.push(` ${people}`);
|
||||
lines.push(` ${summary}`);
|
||||
|
||||
// 因果链
|
||||
for (const cid of ev.causedBy || []) {
|
||||
const c = causalById?.get(cid);
|
||||
if (c) lines.push(formatCausalEventLine(c));
|
||||
}
|
||||
|
||||
// L0→L1 证据单元
|
||||
for (const unit of evidenceUnits) {
|
||||
lines.push(...formatEvidenceUnit(unit));
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -581,19 +556,22 @@ export function buildNonVectorPromptText() {
|
||||
* 构建向量模式注入文本
|
||||
* @param {object} store - 存储对象
|
||||
* @param {object} recallResult - 召回结果
|
||||
* @param {Map} causalById - 因果事件索引
|
||||
* @param {Map<string, object>} causalById - 因果事件索引
|
||||
* @param {string[]} focusEntities - 焦点实体
|
||||
* @param {object} meta - 元数据
|
||||
* @param {object} metrics - 指标对象
|
||||
* @returns {Promise<{promptText: string, injectionLogText: string, injectionStats: object, metrics: object}>}
|
||||
* @returns {Promise<{promptText: string, injectionStats: object, metrics: object}>}
|
||||
*/
|
||||
async function buildVectorPrompt(store, recallResult, causalById, focusEntities = [], meta = null, metrics = null) {
|
||||
async function buildVectorPrompt(store, recallResult, causalById, focusEntities, meta, metrics) {
|
||||
const T_Start = performance.now();
|
||||
|
||||
const { chatId } = getContext();
|
||||
const data = store.json || {};
|
||||
const total = { used: 0, max: MAIN_BUDGET_MAX };
|
||||
|
||||
// 从 recallResult 解构
|
||||
const l0Selected = recallResult?.l0Selected || [];
|
||||
const l1ByFloor = recallResult?.l1ByFloor || new Map();
|
||||
|
||||
// 装配结果
|
||||
const assembled = {
|
||||
constraints: { lines: [], tokens: 0 },
|
||||
@@ -610,15 +588,9 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
constraint: { count: 0, tokens: 0, filtered: 0 },
|
||||
arc: { count: 0, tokens: 0 },
|
||||
event: { selected: 0, tokens: 0 },
|
||||
evidence: { attached: 0, tokens: 0 },
|
||||
distantEvidence: { injected: 0, tokens: 0, anchorCount: 0, contextPairs: 0 },
|
||||
};
|
||||
|
||||
const recentEvidenceStats = {
|
||||
injected: 0,
|
||||
tokens: 0,
|
||||
floorRange: "N/A",
|
||||
contextPairs: 0,
|
||||
evidence: { l0InEvents: 0, l1InEvents: 0, tokens: 0 },
|
||||
distantEvidence: { units: 0, tokens: 0 },
|
||||
recentEvidence: { units: 0, tokens: 0 },
|
||||
};
|
||||
|
||||
const eventDetails = {
|
||||
@@ -627,6 +599,9 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
relatedCount: 0,
|
||||
};
|
||||
|
||||
// 已消费的 L0 ID 集合(事件区域消费后,evidence 区域不再重复)
|
||||
const usedL0Ids = new Set();
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// [Constraints] L3 Facts → 世界约束
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -698,70 +673,9 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// [Events] L2 Events → 直接命中 + 相似命中 + 因果链
|
||||
// [Events] L2 Events → 直接命中 + 相似命中 + 因果链 + L0→L1 证据
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
const eventHits = (recallResult?.events || []).filter(e => e?.event?.summary);
|
||||
const evidenceChunks = recallResult?.evidenceChunks || [];
|
||||
const usedChunkIds = new Set();
|
||||
|
||||
/**
|
||||
* 为事件选择最佳证据 chunk
|
||||
* @param {object} eventObj - 事件对象
|
||||
* @returns {object|null} 最佳 chunk
|
||||
*/
|
||||
function pickBestEvidenceForEvent(eventObj) {
|
||||
const range = parseFloorRange(eventObj?.summary);
|
||||
if (!range) return null;
|
||||
|
||||
let best = null;
|
||||
for (const c of evidenceChunks) {
|
||||
if (usedChunkIds.has(c.chunkId)) continue;
|
||||
if (c.floor < range.start || c.floor > range.end) continue;
|
||||
|
||||
if (!best) {
|
||||
best = c;
|
||||
} else if (c.isAnchorVirtual && !best.isAnchorVirtual) {
|
||||
best = c;
|
||||
} else if (c.isAnchorVirtual === best.isAnchorVirtual && (c.chunkIdx ?? 0) < (best.chunkIdx ?? 0)) {
|
||||
best = c;
|
||||
}
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化事件带证据
|
||||
* @param {object} eventItem - 事件项
|
||||
* @param {number} idx - 编号
|
||||
* @param {object} chunk - 证据 chunk
|
||||
* @returns {string} 格式化后的文本
|
||||
*/
|
||||
function formatEventWithEvidence(eventItem, idx, chunk) {
|
||||
const ev = eventItem.event || {};
|
||||
const time = ev.timeLabel || "";
|
||||
const title = String(ev.title || "").trim();
|
||||
const people = (ev.participants || []).join(" / ").trim();
|
||||
const summary = cleanSummary(ev.summary);
|
||||
|
||||
const displayTitle = title || people || ev.id || "事件";
|
||||
const header = time ? `${idx}.【${time}】${displayTitle}` : `${idx}. ${displayTitle}`;
|
||||
|
||||
const lines = [header];
|
||||
if (people && displayTitle !== people) lines.push(` ${people}`);
|
||||
lines.push(` ${summary}`);
|
||||
|
||||
for (const cid of ev.causedBy || []) {
|
||||
const c = causalById?.get(cid);
|
||||
if (c) lines.push(formatCausalEventLine(c, causalById));
|
||||
}
|
||||
|
||||
if (chunk) {
|
||||
lines.push(` ${formatEvidenceFullLine(chunk)}`);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
const candidates = [...eventHits].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
|
||||
|
||||
@@ -775,52 +689,91 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
|
||||
const isDirect = e._recallType === "DIRECT";
|
||||
|
||||
const bestChunk = pickBestEvidenceForEvent(e.event);
|
||||
// 收集该事件范围内的 L0→L1 证据单元
|
||||
const evidenceUnits = collectEvidenceForEvent(e.event, l0Selected, l1ByFloor, usedL0Ids);
|
||||
|
||||
let text = formatEventWithEvidence(e, 0, bestChunk);
|
||||
let cost = estimateTokens(text);
|
||||
let hasEvidence = !!bestChunk;
|
||||
let chosenChunk = bestChunk || null;
|
||||
// 格式化事件(含证据)
|
||||
const text = formatEventWithEvidence(e, 0, evidenceUnits, causalById);
|
||||
const cost = estimateTokens(text);
|
||||
|
||||
// 预算检查:整个事件(含证据)作为原子单元
|
||||
if (total.used + cost > total.max) {
|
||||
text = formatEventWithEvidence(e, 0, null);
|
||||
cost = estimateTokens(text);
|
||||
hasEvidence = false;
|
||||
chosenChunk = null;
|
||||
// 尝试不带证据的版本
|
||||
const textNoEvidence = formatEventWithEvidence(e, 0, [], causalById);
|
||||
const costNoEvidence = estimateTokens(textNoEvidence);
|
||||
|
||||
if (total.used + cost > total.max) {
|
||||
if (total.used + costNoEvidence > total.max) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 放入不带证据的版本,归还已消费的 L0 ID
|
||||
for (const unit of evidenceUnits) {
|
||||
usedL0Ids.delete(unit.l0.id);
|
||||
}
|
||||
|
||||
if (isDirect) {
|
||||
selectedDirect.push({
|
||||
event: e.event, text: textNoEvidence, tokens: costNoEvidence,
|
||||
evidenceUnits: [], candidateRank,
|
||||
});
|
||||
} else {
|
||||
selectedRelated.push({
|
||||
event: e.event, text: textNoEvidence, tokens: costNoEvidence,
|
||||
evidenceUnits: [], candidateRank,
|
||||
});
|
||||
}
|
||||
|
||||
injectionStats.event.selected++;
|
||||
injectionStats.event.tokens += costNoEvidence;
|
||||
total.used += costNoEvidence;
|
||||
|
||||
eventDetails.list.push({
|
||||
title: e.event?.title || e.event?.id,
|
||||
isDirect,
|
||||
hasEvidence: false,
|
||||
tokens: costNoEvidence,
|
||||
similarity: e.similarity || 0,
|
||||
l0Count: 0,
|
||||
l1Count: 0,
|
||||
});
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// 预算充足,放入完整版本
|
||||
const l0Count = evidenceUnits.length;
|
||||
let l1Count = 0;
|
||||
for (const unit of evidenceUnits) {
|
||||
if (unit.userL1) l1Count++;
|
||||
if (unit.aiL1) l1Count++;
|
||||
}
|
||||
|
||||
if (isDirect) {
|
||||
selectedDirect.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence, candidateRank });
|
||||
selectedDirect.push({
|
||||
event: e.event, text, tokens: cost,
|
||||
evidenceUnits, candidateRank,
|
||||
});
|
||||
} else {
|
||||
selectedRelated.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence, candidateRank });
|
||||
selectedRelated.push({
|
||||
event: e.event, text, tokens: cost,
|
||||
evidenceUnits, candidateRank,
|
||||
});
|
||||
}
|
||||
|
||||
injectionStats.event.selected++;
|
||||
injectionStats.event.tokens += cost;
|
||||
injectionStats.evidence.l0InEvents += l0Count;
|
||||
injectionStats.evidence.l1InEvents += l1Count;
|
||||
total.used += cost;
|
||||
|
||||
if (hasEvidence && bestChunk) {
|
||||
const chunkLine = formatEvidenceFullLine(bestChunk);
|
||||
const ct = estimateTokens(chunkLine);
|
||||
injectionStats.evidence.attached++;
|
||||
injectionStats.evidence.tokens += ct;
|
||||
usedChunkIds.add(bestChunk.chunkId);
|
||||
|
||||
injectionStats.event.tokens += Math.max(0, cost - ct);
|
||||
} else {
|
||||
injectionStats.event.tokens += cost;
|
||||
}
|
||||
|
||||
eventDetails.list.push({
|
||||
title: e.event?.title || e.event?.id,
|
||||
isDirect,
|
||||
hasEvidence,
|
||||
hasEvidence: l0Count > 0,
|
||||
tokens: cost,
|
||||
similarity: e.similarity || 0,
|
||||
hasAnchorEvidence: bestChunk?.isAnchorVirtual || false,
|
||||
l0Count,
|
||||
l1Count,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -845,110 +798,81 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
assembled.relatedEvents.lines = relatedEventTexts;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// [Evidence - Distant] L1 Chunks → 远期证据(已总结范围)
|
||||
// [Evidence - Distant] 远期证据(已总结范围,未被事件消费的 L0→L1)
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
const lastSummarized = store.lastSummarizedMesId ?? -1;
|
||||
const lastChunkFloor = meta?.lastChunkFloor ?? -1;
|
||||
const keepVisible = store.keepVisibleCount ?? 3;
|
||||
|
||||
const distantContextFloors = new Set();
|
||||
const distantCandidates = evidenceChunks
|
||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||
.filter(c => c.floor <= lastSummarized);
|
||||
// 收集未被事件消费的 L0,按 rerankScore 降序
|
||||
const remainingL0 = l0Selected
|
||||
.filter(l0 => !usedL0Ids.has(l0.id))
|
||||
.sort((a, b) => (b.rerankScore || 0) - (a.rerankScore || 0));
|
||||
|
||||
for (const c of distantCandidates) {
|
||||
if (c.isAnchorVirtual) continue;
|
||||
const pairFloor = getContextFloor(c);
|
||||
if (pairFloor >= 0) distantContextFloors.add(pairFloor);
|
||||
}
|
||||
// 远期:floor <= lastSummarized
|
||||
const distantL0 = remainingL0.filter(l0 => l0.floor <= lastSummarized);
|
||||
|
||||
let contextChunksByFloor = new Map();
|
||||
if (chatId && distantContextFloors.size > 0) {
|
||||
try {
|
||||
const contextChunks = await getChunksByFloors(chatId, Array.from(distantContextFloors));
|
||||
for (const pc of contextChunks) {
|
||||
if (!contextChunksByFloor.has(pc.floor)) {
|
||||
contextChunksByFloor.set(pc.floor, []);
|
||||
}
|
||||
contextChunksByFloor.get(pc.floor).push(pc);
|
||||
}
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, "获取配对chunks失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (distantCandidates.length && total.used < total.max) {
|
||||
if (distantL0.length && total.used < total.max) {
|
||||
const distantBudget = { used: 0, max: Math.min(DISTANT_EVIDENCE_MAX, total.max - total.used) };
|
||||
|
||||
const result = assembleEvidenceByFloor(
|
||||
distantCandidates.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))),
|
||||
contextChunksByFloor,
|
||||
distantBudget
|
||||
);
|
||||
// 按楼层排序(时间顺序)
|
||||
distantL0.sort((a, b) => a.floor - b.floor);
|
||||
|
||||
for (const l0 of distantL0) {
|
||||
const unit = buildEvidenceUnit(l0, l1ByFloor);
|
||||
|
||||
// 原子单元预算检查
|
||||
if (distantBudget.used + unit.totalTokens > distantBudget.max) continue;
|
||||
|
||||
const unitLines = formatEvidenceUnit(unit);
|
||||
for (const line of unitLines) {
|
||||
assembled.distantEvidence.lines.push(line);
|
||||
}
|
||||
distantBudget.used += unit.totalTokens;
|
||||
usedL0Ids.add(l0.id);
|
||||
injectionStats.distantEvidence.units++;
|
||||
}
|
||||
|
||||
assembled.distantEvidence.lines = result.lines;
|
||||
assembled.distantEvidence.tokens = distantBudget.used;
|
||||
total.used += distantBudget.used;
|
||||
|
||||
injectionStats.distantEvidence.injected = result.lines.length;
|
||||
injectionStats.distantEvidence.tokens = distantBudget.used;
|
||||
injectionStats.distantEvidence.anchorCount = result.anchorCount;
|
||||
injectionStats.distantEvidence.contextPairs = result.contextPairsCount;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
// [Evidence - Recent] L1 Chunks → 近期证据(未总结范围,独立预算)
|
||||
// [Evidence - Recent] 近期证据(未总结范围,独立预算)
|
||||
// ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
const recentStart = lastSummarized + 1;
|
||||
const recentEnd = lastChunkFloor - keepVisible;
|
||||
|
||||
if (evidenceChunks.length && recentEnd >= recentStart) {
|
||||
const recentCandidates = evidenceChunks
|
||||
.filter(c => !usedChunkIds.has(c.chunkId))
|
||||
.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
|
||||
if (recentEnd >= recentStart) {
|
||||
const recentL0 = remainingL0
|
||||
.filter(l0 => !usedL0Ids.has(l0.id))
|
||||
.filter(l0 => l0.floor >= recentStart && l0.floor <= recentEnd);
|
||||
|
||||
const recentContextFloors = new Set();
|
||||
for (const c of recentCandidates) {
|
||||
if (c.isAnchorVirtual) continue;
|
||||
const pairFloor = getContextFloor(c);
|
||||
if (pairFloor >= 0) recentContextFloors.add(pairFloor);
|
||||
}
|
||||
|
||||
if (chatId && recentContextFloors.size > 0) {
|
||||
const newFloors = Array.from(recentContextFloors).filter(f => !contextChunksByFloor.has(f));
|
||||
if (newFloors.length > 0) {
|
||||
try {
|
||||
const newContextChunks = await getChunksByFloors(chatId, newFloors);
|
||||
for (const pc of newContextChunks) {
|
||||
if (!contextChunksByFloor.has(pc.floor)) {
|
||||
contextChunksByFloor.set(pc.floor, []);
|
||||
}
|
||||
contextChunksByFloor.get(pc.floor).push(pc);
|
||||
}
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, "获取近期配对chunks失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (recentCandidates.length) {
|
||||
if (recentL0.length) {
|
||||
const recentBudget = { used: 0, max: RECENT_EVIDENCE_MAX };
|
||||
|
||||
const result = assembleEvidenceByFloor(
|
||||
recentCandidates.sort((a, b) => (a.floor - b.floor) || ((a.chunkIdx ?? 0) - (b.chunkIdx ?? 0))),
|
||||
contextChunksByFloor,
|
||||
recentBudget
|
||||
);
|
||||
// 按楼层排序(时间顺序)
|
||||
recentL0.sort((a, b) => a.floor - b.floor);
|
||||
|
||||
for (const l0 of recentL0) {
|
||||
const unit = buildEvidenceUnit(l0, l1ByFloor);
|
||||
|
||||
if (recentBudget.used + unit.totalTokens > recentBudget.max) continue;
|
||||
|
||||
const unitLines = formatEvidenceUnit(unit);
|
||||
for (const line of unitLines) {
|
||||
assembled.recentEvidence.lines.push(line);
|
||||
}
|
||||
recentBudget.used += unit.totalTokens;
|
||||
usedL0Ids.add(l0.id);
|
||||
injectionStats.recentEvidence.units++;
|
||||
}
|
||||
|
||||
assembled.recentEvidence.lines = result.lines;
|
||||
assembled.recentEvidence.tokens = recentBudget.used;
|
||||
|
||||
recentEvidenceStats.injected = result.lines.length;
|
||||
recentEvidenceStats.tokens = recentBudget.used;
|
||||
recentEvidenceStats.floorRange = `${recentStart + 1}~${recentEnd + 1}楼`;
|
||||
recentEvidenceStats.contextPairs = result.contextPairsCount;
|
||||
injectionStats.recentEvidence.tokens = recentBudget.used;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -984,7 +908,7 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
metrics.timing.evidenceAssembly = Math.round(performance.now() - T_Start - (metrics.timing.constraintFilter || 0));
|
||||
metrics.timing.formatting = 0;
|
||||
}
|
||||
return { promptText: "", injectionLogText: "", injectionStats, metrics };
|
||||
return { promptText: "", injectionStats, metrics };
|
||||
}
|
||||
|
||||
const promptText =
|
||||
@@ -1009,15 +933,16 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
metrics.budget.utilization = Math.round(metrics.budget.total / TOTAL_BUDGET_MAX * 100);
|
||||
metrics.budget.breakdown = {
|
||||
constraints: assembled.constraints.tokens,
|
||||
events: injectionStats.event.tokens + injectionStats.evidence.tokens,
|
||||
events: injectionStats.event.tokens,
|
||||
distantEvidence: injectionStats.distantEvidence.tokens,
|
||||
recentEvidence: recentEvidenceStats.tokens || 0,
|
||||
recentEvidence: injectionStats.recentEvidence.tokens,
|
||||
arcs: assembled.arcs.tokens,
|
||||
};
|
||||
|
||||
metrics.evidence.tokens = injectionStats.distantEvidence.tokens + (recentEvidenceStats.tokens || 0);
|
||||
metrics.evidence.contextPairsAdded = injectionStats.distantEvidence.contextPairs + recentEvidenceStats.contextPairs;
|
||||
metrics.evidence.assemblyTime = Math.round(performance.now() - T_Start - (metrics.timing.constraintFilter || 0) - metrics.formatting.time);
|
||||
metrics.evidence.tokens = injectionStats.distantEvidence.tokens + injectionStats.recentEvidence.tokens;
|
||||
metrics.evidence.assemblyTime = Math.round(
|
||||
performance.now() - T_Start - (metrics.timing.constraintFilter || 0) - metrics.formatting.time
|
||||
);
|
||||
metrics.timing.evidenceAssembly = metrics.evidence.assemblyTime;
|
||||
|
||||
const totalFacts = allFacts.length;
|
||||
@@ -1026,76 +951,19 @@ async function buildVectorPrompt(store, recallResult, causalById, focusEntities
|
||||
: 100;
|
||||
metrics.quality.eventPrecisionProxy = metrics.event?.similarityDistribution?.mean || 0;
|
||||
|
||||
const totalSelected = metrics.evidence.selected || 0;
|
||||
const attached = injectionStats.evidence.attached;
|
||||
metrics.quality.evidenceDensity = totalSelected > 0
|
||||
? Math.round(attached / totalSelected * 100)
|
||||
const totalL0Selected = l0Selected.length;
|
||||
const l0WithL1 = l0Selected.filter(l0 => {
|
||||
const pair = l1ByFloor.get(l0.floor);
|
||||
return pair?.aiTop1 || pair?.userTop1;
|
||||
}).length;
|
||||
metrics.quality.l1AttachRate = totalL0Selected > 0
|
||||
? Math.round(l0WithL1 / totalL0Selected * 100)
|
||||
: 0;
|
||||
|
||||
const selectedReal = metrics.evidence.selectedByType?.chunkReal || 0;
|
||||
const selectedTotal = metrics.evidence.selected || 0;
|
||||
metrics.quality.chunkRealRatio = selectedTotal > 0
|
||||
? Math.round(selectedReal / selectedTotal * 100)
|
||||
: 0;
|
||||
|
||||
|
||||
metrics.quality.potentialIssues = detectIssues(metrics);
|
||||
}
|
||||
|
||||
return { promptText, injectionLogText: "", injectionStats, metrics };
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// 因果证据补充
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 为因果事件附加证据
|
||||
* @param {object[]} causalChain - 因果链
|
||||
* @param {Map} eventVectorMap - 事件向量索引
|
||||
* @param {Map} chunkVectorMap - chunk 向量索引
|
||||
* @param {Map} chunksMap - chunks 索引
|
||||
*/
|
||||
async function attachEvidenceToCausalEvents(causalChain, eventVectorMap, chunkVectorMap, chunksMap) {
|
||||
for (const c of causalChain) {
|
||||
c._evidenceChunk = null;
|
||||
|
||||
const ev = c.event;
|
||||
if (!ev?.id) continue;
|
||||
|
||||
const evVec = eventVectorMap.get(ev.id);
|
||||
if (!evVec?.length) continue;
|
||||
|
||||
const range = parseFloorRange(ev.summary);
|
||||
if (!range) continue;
|
||||
|
||||
const candidateChunks = [];
|
||||
for (const [chunkId, chunk] of chunksMap) {
|
||||
if (chunk.floor >= range.start && chunk.floor <= range.end) {
|
||||
const vec = chunkVectorMap.get(chunkId);
|
||||
if (vec?.length) candidateChunks.push({ chunk, vec });
|
||||
}
|
||||
}
|
||||
if (!candidateChunks.length) continue;
|
||||
|
||||
let best = null;
|
||||
let bestSim = -1;
|
||||
for (const { chunk, vec } of candidateChunks) {
|
||||
const sim = cosineSimilarity(evVec, vec);
|
||||
if (sim > bestSim) {
|
||||
bestSim = sim;
|
||||
best = chunk;
|
||||
}
|
||||
}
|
||||
|
||||
if (best && bestSim > 0.3) {
|
||||
c._evidenceChunk = {
|
||||
floor: best.floor,
|
||||
speaker: best.speaker,
|
||||
text: best.text,
|
||||
similarity: bestSim,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { promptText, injectionStats, metrics };
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -1150,43 +1018,16 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
||||
recallResult = {
|
||||
...recallResult,
|
||||
events: recallResult?.events || [],
|
||||
evidenceChunks: recallResult?.evidenceChunks || [],
|
||||
l0Selected: recallResult?.l0Selected || [],
|
||||
l1ByFloor: recallResult?.l1ByFloor || new Map(),
|
||||
causalChain: recallResult?.causalChain || [],
|
||||
focusEntities: recallResult?.focusEntities || [],
|
||||
logText: recallResult?.logText || "",
|
||||
metrics: recallResult?.metrics || null,
|
||||
};
|
||||
|
||||
const causalChain = recallResult.causalChain || [];
|
||||
if (causalChain.length > 0) {
|
||||
if (chatId) {
|
||||
try {
|
||||
const floors = new Set();
|
||||
for (const c of causalChain) {
|
||||
const r = parseFloorRange(c.event?.summary);
|
||||
if (!r) continue;
|
||||
for (let f = r.start; f <= r.end; f++) floors.add(f);
|
||||
}
|
||||
|
||||
const [chunksList, chunkVecs, eventVecs] = await Promise.all([
|
||||
getChunksByFloors(chatId, Array.from(floors)),
|
||||
getAllChunkVectors(chatId),
|
||||
getAllEventVectors(chatId),
|
||||
]);
|
||||
|
||||
const chunksMap = new Map(chunksList.map(c => [c.chunkId, c]));
|
||||
const chunkVectorMap = new Map(chunkVecs.map(v => [v.chunkId, v.vector]));
|
||||
const eventVectorMap = new Map(eventVecs.map(v => [v.eventId, v.vector]));
|
||||
|
||||
await attachEvidenceToCausalEvents(causalChain, eventVectorMap, chunkVectorMap, chunksMap);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, "Causal evidence attachment failed", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 构建因果事件索引
|
||||
causalById = new Map(
|
||||
recallResult.causalChain
|
||||
(recallResult.causalChain || [])
|
||||
.map(c => [c?.event?.id, c])
|
||||
.filter(x => x[0])
|
||||
);
|
||||
@@ -1210,7 +1051,7 @@ export async function buildVectorPromptText(excludeLastAi = false, hooks = {}) {
|
||||
|
||||
const hasUseful =
|
||||
(recallResult?.events?.length || 0) > 0 ||
|
||||
(recallResult?.evidenceChunks?.length || 0) > 0 ||
|
||||
(recallResult?.l0Selected?.length || 0) > 0 ||
|
||||
(recallResult?.causalChain?.length || 0) > 0;
|
||||
|
||||
if (!hasUseful) {
|
||||
|
||||
Reference in New Issue
Block a user