Add vector IO and text filtering

This commit is contained in:
2026-01-29 17:02:51 +08:00
parent fc23781e17
commit ee5f02fff9
10 changed files with 3368 additions and 42 deletions

View File

@@ -163,7 +163,6 @@ function formatCausalEventLine(causalItem, causalById) {
return lines.join("\n");
}
// ─────────────────────────────────────────────────────────────────────────────
// 装配日志(开发调试用)
// ─────────────────────────────────────────────────────────────────────────────
@@ -251,6 +250,19 @@ function formatInjectionLog(stats, details, recentOrphanStats = null) {
return lines.join("\n");
}
// 重写事件文本里的序号前缀:把 “{idx}. ” 或 “{idx}.【...】” 的 idx 替换
function renumberEventText(text, newIndex) {
const s = String(text || "");
// 匹配行首: "12." 或 "12.【"
return s.replace(/^(\s*)\d+(\.\s*(?:【)?)/, `$1${newIndex}$2`);
}
function getEventSortKey(ev) {
const r = parseFloorRange(ev?.summary);
if (r) return r.start; // 按事件出现楼层排序(最靠谱)
const m = String(ev?.id || "").match(/evt-(\d+)/);
return m ? parseInt(m[1], 10) : Number.MAX_SAFE_INTEGER;
}
// ─────────────────────────────────────────────────────────────────────────────
// 非向量模式:全量总结注入(世界 + 事件 + 弧光)
@@ -451,29 +463,29 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
// 候选按相似度从高到低(保证高分优先拥有证据)
const candidates = [...recalledEvents].sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
let idxDirect = 1;
let idxSimilar = 1;
const selectedDirectTexts = [];
const selectedSimilarTexts = [];
const selectedDirect = []; // { event, text, tokens, chunk, hasEvidence }
const selectedSimilar = []; // { event, text, tokens, chunk, hasEvidence }
for (const e of candidates) {
if (total.used >= total.max) break;
const isDirect = e._recallType === "DIRECT";
const idx = isDirect ? idxDirect : idxSimilar;
const bestChunk = pickBestChunkForEvent(e.event);
// 先尝试“带证据”
let text = formatEventWithEvidence(e, idx, bestChunk);
// idx 先占位写 0后面统一按时间线重排后再改号
let text = formatEventWithEvidence(e, 0, bestChunk);
let cost = estimateTokens(text);
let hasEvidence = !!bestChunk;
let chosenChunk = bestChunk || null;
// 塞不下就退化成“不带证据”
if (total.used + cost > total.max) {
text = formatEventWithEvidence(e, idx, null);
text = formatEventWithEvidence(e, 0, null);
cost = estimateTokens(text);
hasEvidence = false;
chosenChunk = null;
if (total.used + cost > total.max) {
continue;
@@ -482,11 +494,9 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
// 写入
if (isDirect) {
selectedDirectTexts.push(text);
idxDirect++;
selectedDirect.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence });
} else {
selectedSimilarTexts.push(text);
idxSimilar++;
selectedSimilar.push({ event: e.event, text, tokens: cost, chunk: chosenChunk, hasEvidence });
}
injectionStats.events.selected++;
@@ -515,8 +525,19 @@ async function buildVectorPrompt(store, recallResult, causalById, queryEntities
});
}
details.directCount = selectedDirectTexts.length;
details.similarCount = selectedSimilarTexts.length;
// ═══════════════════════════════════════════════════════════════════
// 重排:恢复时间线顺序(按楼层/evt 序号升序)
// 并统一重编号(不重新 pick chunk不重新格式化结构
// ═══════════════════════════════════════════════════════════════════
selectedDirect.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));
selectedSimilar.sort((a, b) => getEventSortKey(a.event) - getEventSortKey(b.event));
const selectedDirectTexts = selectedDirect.map((it, i) => renumberEventText(it.text, i + 1));
const selectedSimilarTexts = selectedSimilar.map((it, i) => renumberEventText(it.text, i + 1));
details.directCount = selectedDirect.length;
details.similarCount = selectedSimilar.length;
assembled.events.direct = selectedDirectTexts;
assembled.events.similar = selectedSimilarTexts;