feat: variables 2.0 state + L0 summary integration

This commit is contained in:
2026-01-31 23:06:03 +08:00
parent 201c74dc71
commit 4b0541610b
22 changed files with 1949 additions and 2314 deletions

View File

@@ -12,6 +12,12 @@ import { xbLog } from '../../../core/debug-core.js';
import { getContext } from '../../../../../../extensions.js';
import { getSummaryStore } from '../data/store.js';
import { filterText } from './text-filter.js';
import {
searchStateAtoms,
buildL0FloorBonus,
stateToVirtualChunks,
mergeAndSparsify,
} from './state-recall.js';
const MODULE_ID = 'recall';
@@ -35,12 +41,16 @@ const CONFIG = {
MIN_SIMILARITY_EVENT: 0.65,
MMR_LAMBDA: 0.72,
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
FLOOR_LIMIT: 1,
};
BONUS_PARTICIPANT_HIT: 0.08,
BONUS_TEXT_HIT: 0.05,
BONUS_WORLD_TOPIC_HIT: 0.06,
// L0 配置
L0_FLOOR_BONUS_FACTOR: 0.10,
FLOOR_MAX_CHUNKS: 2,
FLOOR_LIMIT: 1,
};
// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
@@ -136,10 +146,20 @@ function sortCausalEvents(causalArray) {
});
}
function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
function normalize(s) {
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
}
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
function parseFloorRange(summary) {
if (!summary) return null;
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
if (!match) return null;
const start = Math.max(0, parseInt(match[1], 10) - 1);
const end = Math.max(0, (match[2] ? parseInt(match[2], 10) : parseInt(match[1], 10)) - 1);
return { start, end };
}
function cleanForRecall(text) {
// 1. 应用用户自定义过滤规则
// 2. 移除 TTS 标记(硬编码)
@@ -308,7 +328,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
// L1 Chunks 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchChunks(queryVector, vectorConfig) {
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
const { chatId } = getContext();
if (!chatId || !queryVector?.length) return [];
@@ -321,12 +341,18 @@ async function searchChunks(queryVector, vectorConfig) {
const scored = chunkVectors.map(cv => {
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
const floor = match ? parseInt(match[1], 10) : 0;
const baseSim = cosineSimilarity(queryVector, cv.vector);
const l0Bonus = l0FloorBonus.get(floor) || 0;
return {
_id: cv.chunkId,
chunkId: cv.chunkId,
floor: match ? parseInt(match[1], 10) : 0,
chunkIdx: match ? parseInt(match[2], 10) : 0,
similarity: cosineSimilarity(queryVector, cv.vector),
floor,
chunkIdx: match ? parseInt(match[2], 10) : 0,
similarity: baseSim + l0Bonus,
_baseSimilarity: baseSim,
_l0Bonus: l0Bonus,
vector: cv.vector,
};
});
@@ -403,30 +429,19 @@ async function searchChunks(queryVector, vectorConfig) {
// ═══════════════════════════════════════════════════════════════════════════
// L2 Events 检索
// ═══════════════════════════════════════════════════════════════════════════
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) {
console.warn('[searchEvents] 早期返回: chatId或queryVector为空');
return [];
}
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
console.log('[searchEvents] fingerprint检查:', {
metaFp: meta.fingerprint,
currentFp: fp,
match: meta.fingerprint === fp || !meta.fingerprint,
});
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
console.log('[searchEvents] 向量数据:', {
eventVectorsCount: eventVectors.length,
vectorMapSize: vectorMap.size,
allEventsCount: allEvents?.length,
});
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
const { chatId, name1 } = getContext();
if (!chatId || !queryVector?.length) {
return [];
}
const meta = await getMeta(chatId);
const fp = getEngineFingerprint(vectorConfig);
if (meta.fingerprint && meta.fingerprint !== fp) return [];
const eventVectors = await getAllEventVectors(chatId);
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
if (!vectorMap.size) return [];
const userName = normalize(name1);
@@ -458,11 +473,23 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
bonus += CONFIG.BONUS_TEXT_HIT;
reasons.push('text');
}
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
// world topic 命中
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
reasons.push('world');
}
// L0 加权:事件覆盖楼层范围命中
const range = parseFloorRange(event.summary);
if (range) {
for (let f = range.start; f <= range.end; f++) {
if (l0FloorBonus.has(f)) {
bonus += l0FloorBonus.get(f);
reasons.push('L0');
break;
}
}
}
return {
@@ -477,15 +504,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
vector: v,
};
});
// 相似度分布日志
const simValues = scored.map(s => s.similarity).sort((a, b) => b - a);
console.log('[searchEvents] 相似度分布前20:', simValues.slice(0, 20));
console.log('[searchEvents] 相似度分布后20:', simValues.slice(-20));
console.log('[searchEvents] 有向量的事件数:', scored.filter(s => s.similarity > 0).length);
console.log('[searchEvents] sim >= 0.6:', scored.filter(s => s.similarity >= 0.6).length);
console.log('[searchEvents] sim >= 0.5:', scored.filter(s => s.similarity >= 0.5).length);
console.log('[searchEvents] sim >= 0.3:', scored.filter(s => s.similarity >= 0.3).length);
// ★ 记录过滤前的分布(用 finalScore与显示一致
const preFilterDistribution = {
@@ -503,7 +521,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
const candidates = scored
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
.sort((a, b) => b.finalScore - a.finalScore)
.slice(0, CONFIG.CANDIDATE_EVENTS);
.slice(0, CONFIG.CANDIDATE_EVENTS);
// 动态 K质量不够就少拿
@@ -575,7 +592,7 @@ function formatCausalTree(causalEvents, recalledEvents) {
// ═══════════════════════════════════════════════════════════════════════════
// 日志:主报告
// ═══════════════════════════════════════════════════════════════════════════
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null, l0Results = [] }) {
const lines = [
'╔══════════════════════════════════════════════════════════════╗',
@@ -604,13 +621,36 @@ function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResult
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
});
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
lines.push('');
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
lines.push('└─────────────────────────────────────────────────────────────┘');
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【L0 语义锚点】状态变更加权信号 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
if (l0Results.length) {
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
lines.push(` 召回: ${l0Results.length}`);
lines.push(` 影响楼层: ${l0Floors.join(', ')}L1/L2 候选在这些楼层获得 +${CONFIG.L0_FLOOR_BONUS_FACTOR} 加分)`);
lines.push('');
l0Results.slice(0, 10).forEach((r, i) => {
lines.push(` ${String(i + 1).padStart(2)}. #${r.floor} ${r.atom.semantic.slice(0, 50)}${r.atom.semantic.length > 50 ? '...' : ''}`);
lines.push(` 相似度: ${r.similarity.toFixed(3)}`);
});
if (l0Results.length > 10) {
lines.push(` ... 还有 ${l0Results.length - 10}`);
}
} else {
lines.push(' 召回: 0 条(无 L0 数据或未启用)');
}
lines.push('');
lines.push('┌─────────────────────────────────────────────────────────────┐');
lines.push('│ 【L1 原文片段】 │');
lines.push('└─────────────────────────────────────────────────────────────┘');
@@ -706,16 +746,36 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
if (!queryVector?.length) {
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
}
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
const lexicon = buildEntityLexicon(store, allEvents);
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
// ════════════════════════════════════════════════════════════════════════
// L0 召回
// ════════════════════════════════════════════════════════════════════════
let l0Results = [];
let l0FloorBonus = new Map();
let l0VirtualChunks = [];
try {
l0Results = await searchStateAtoms(queryVector, vectorConfig);
l0FloorBonus = buildL0FloorBonus(l0Results, CONFIG.L0_FLOOR_BONUS_FACTOR);
l0VirtualChunks = stateToVirtualChunks(l0Results);
} catch (e) {
xbLog.warn(MODULE_ID, 'L0 召回失败,降级处理', e);
}
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig),
const [chunkResults, eventResults] = await Promise.all([
searchChunks(queryVector, vectorConfig, l0FloorBonus),
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
]);
const chunkPreFilterStats = chunkResults._preFilterStats || null;
// ════════════════════════════════════════════════════════════════════════
// 合并 L0 虚拟 chunks 到 L1
// ════════════════════════════════════════════════════════════════════════
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
// ─────────────────────────────────────────────────────────────────────
// 因果链追溯:从 eventResults 出发找祖先事件
@@ -742,25 +802,26 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
sortCausalEvents(causalEvents);
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
const elapsed = Math.round(performance.now() - T0);
const logText = formatRecallLog({
elapsed,
queryText,
segments,
weights,
const elapsed = Math.round(performance.now() - T0);
const logText = formatRecallLog({
elapsed,
queryText,
segments,
weights,
chunkResults: mergedChunks,
eventResults,
allEvents,
queryEntities,
causalEvents: causalEventsTruncated,
chunkPreFilterStats,
l0Results,
});
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
console.groupEnd();
});
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
console.log(`Elapsed: ${elapsed}ms | L0: ${l0Results.length} | Entities: ${queryEntities.join(', ') || '(none)'}`);
console.log(`L1: ${mergedChunks.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
console.groupEnd();
return { events: eventResults, causalEvents: causalEventsTruncated, chunks: mergedChunks, elapsed, logText, queryEntities, l0Results };
}