feat: variables 2.0 state + L0 summary integration
This commit is contained in:
@@ -12,6 +12,12 @@ import { xbLog } from '../../../core/debug-core.js';
|
||||
import { getContext } from '../../../../../../extensions.js';
|
||||
import { getSummaryStore } from '../data/store.js';
|
||||
import { filterText } from './text-filter.js';
|
||||
import {
|
||||
searchStateAtoms,
|
||||
buildL0FloorBonus,
|
||||
stateToVirtualChunks,
|
||||
mergeAndSparsify,
|
||||
} from './state-recall.js';
|
||||
|
||||
const MODULE_ID = 'recall';
|
||||
|
||||
@@ -35,12 +41,16 @@ const CONFIG = {
|
||||
MIN_SIMILARITY_EVENT: 0.65,
|
||||
MMR_LAMBDA: 0.72,
|
||||
|
||||
BONUS_PARTICIPANT_HIT: 0.08,
|
||||
BONUS_TEXT_HIT: 0.05,
|
||||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
||||
|
||||
FLOOR_LIMIT: 1,
|
||||
};
|
||||
BONUS_PARTICIPANT_HIT: 0.08,
|
||||
BONUS_TEXT_HIT: 0.05,
|
||||
BONUS_WORLD_TOPIC_HIT: 0.06,
|
||||
|
||||
// L0 配置
|
||||
L0_FLOOR_BONUS_FACTOR: 0.10,
|
||||
FLOOR_MAX_CHUNKS: 2,
|
||||
|
||||
FLOOR_LIMIT: 1,
|
||||
};
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 工具函数
|
||||
@@ -136,10 +146,20 @@ function sortCausalEvents(causalArray) {
|
||||
});
|
||||
}
|
||||
|
||||
function normalize(s) {
|
||||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||||
}
|
||||
|
||||
function normalize(s) {
|
||||
return String(s || '').normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim();
|
||||
}
|
||||
|
||||
// 从 summary 解析楼层范围:(#321-322) 或 (#321)
|
||||
function parseFloorRange(summary) {
|
||||
if (!summary) return null;
|
||||
const match = String(summary).match(/\(#(\d+)(?:-(\d+))?\)/);
|
||||
if (!match) return null;
|
||||
const start = Math.max(0, parseInt(match[1], 10) - 1);
|
||||
const end = Math.max(0, (match[2] ? parseInt(match[2], 10) : parseInt(match[1], 10)) - 1);
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
function cleanForRecall(text) {
|
||||
// 1. 应用用户自定义过滤规则
|
||||
// 2. 移除 TTS 标记(硬编码)
|
||||
@@ -308,7 +328,7 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
|
||||
// L1 Chunks 检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchChunks(queryVector, vectorConfig) {
|
||||
async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map()) {
|
||||
const { chatId } = getContext();
|
||||
if (!chatId || !queryVector?.length) return [];
|
||||
|
||||
@@ -321,12 +341,18 @@ async function searchChunks(queryVector, vectorConfig) {
|
||||
|
||||
const scored = chunkVectors.map(cv => {
|
||||
const match = String(cv.chunkId).match(/c-(\d+)-(\d+)/);
|
||||
const floor = match ? parseInt(match[1], 10) : 0;
|
||||
const baseSim = cosineSimilarity(queryVector, cv.vector);
|
||||
const l0Bonus = l0FloorBonus.get(floor) || 0;
|
||||
|
||||
return {
|
||||
_id: cv.chunkId,
|
||||
chunkId: cv.chunkId,
|
||||
floor: match ? parseInt(match[1], 10) : 0,
|
||||
chunkIdx: match ? parseInt(match[2], 10) : 0,
|
||||
similarity: cosineSimilarity(queryVector, cv.vector),
|
||||
floor,
|
||||
chunkIdx: match ? parseInt(match[2], 10) : 0,
|
||||
similarity: baseSim + l0Bonus,
|
||||
_baseSimilarity: baseSim,
|
||||
_l0Bonus: l0Bonus,
|
||||
vector: cv.vector,
|
||||
};
|
||||
});
|
||||
@@ -403,30 +429,19 @@ async function searchChunks(queryVector, vectorConfig) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// L2 Events 检索
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities) {
|
||||
const { chatId, name1 } = getContext();
|
||||
if (!chatId || !queryVector?.length) {
|
||||
console.warn('[searchEvents] 早期返回: chatId或queryVector为空');
|
||||
return [];
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
console.log('[searchEvents] fingerprint检查:', {
|
||||
metaFp: meta.fingerprint,
|
||||
currentFp: fp,
|
||||
match: meta.fingerprint === fp || !meta.fingerprint,
|
||||
});
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
console.log('[searchEvents] 向量数据:', {
|
||||
eventVectorsCount: eventVectors.length,
|
||||
vectorMapSize: vectorMap.size,
|
||||
allEventsCount: allEvents?.length,
|
||||
});
|
||||
|
||||
async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus = new Map()) {
|
||||
const { chatId, name1 } = getContext();
|
||||
if (!chatId || !queryVector?.length) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const meta = await getMeta(chatId);
|
||||
const fp = getEngineFingerprint(vectorConfig);
|
||||
if (meta.fingerprint && meta.fingerprint !== fp) return [];
|
||||
|
||||
const eventVectors = await getAllEventVectors(chatId);
|
||||
const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
|
||||
if (!vectorMap.size) return [];
|
||||
|
||||
const userName = normalize(name1);
|
||||
@@ -458,11 +473,23 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
bonus += CONFIG.BONUS_TEXT_HIT;
|
||||
reasons.push('text');
|
||||
}
|
||||
|
||||
// world topic 命中
|
||||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
||||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
||||
reasons.push('world');
|
||||
|
||||
// world topic 命中
|
||||
if (worldTopics.some(topic => querySet.has(topic) && text.includes(topic))) {
|
||||
bonus += CONFIG.BONUS_WORLD_TOPIC_HIT;
|
||||
reasons.push('world');
|
||||
}
|
||||
|
||||
// L0 加权:事件覆盖楼层范围命中
|
||||
const range = parseFloorRange(event.summary);
|
||||
if (range) {
|
||||
for (let f = range.start; f <= range.end; f++) {
|
||||
if (l0FloorBonus.has(f)) {
|
||||
bonus += l0FloorBonus.get(f);
|
||||
reasons.push('L0');
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -477,15 +504,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
vector: v,
|
||||
};
|
||||
});
|
||||
|
||||
// 相似度分布日志
|
||||
const simValues = scored.map(s => s.similarity).sort((a, b) => b - a);
|
||||
console.log('[searchEvents] 相似度分布(前20):', simValues.slice(0, 20));
|
||||
console.log('[searchEvents] 相似度分布(后20):', simValues.slice(-20));
|
||||
console.log('[searchEvents] 有向量的事件数:', scored.filter(s => s.similarity > 0).length);
|
||||
console.log('[searchEvents] sim >= 0.6:', scored.filter(s => s.similarity >= 0.6).length);
|
||||
console.log('[searchEvents] sim >= 0.5:', scored.filter(s => s.similarity >= 0.5).length);
|
||||
console.log('[searchEvents] sim >= 0.3:', scored.filter(s => s.similarity >= 0.3).length);
|
||||
|
||||
// ★ 记录过滤前的分布(用 finalScore,与显示一致)
|
||||
const preFilterDistribution = {
|
||||
@@ -503,7 +521,6 @@ async function searchEvents(queryVector, allEvents, vectorConfig, store, queryEn
|
||||
const candidates = scored
|
||||
.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
|
||||
.sort((a, b) => b.finalScore - a.finalScore)
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
.slice(0, CONFIG.CANDIDATE_EVENTS);
|
||||
|
||||
// 动态 K:质量不够就少拿
|
||||
@@ -575,7 +592,7 @@ function formatCausalTree(causalEvents, recalledEvents) {
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 日志:主报告
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResults, allEvents, queryEntities, causalEvents = [], chunkPreFilterStats = null, l0Results = [] }) {
|
||||
const lines = [
|
||||
'╔══════════════════════════════════════════════════════════════╗',
|
||||
@@ -604,13 +621,36 @@ function formatRecallLog({ elapsed, segments, weights, chunkResults, eventResult
|
||||
lines.push(` ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
|
||||
});
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||||
|
||||
lines.push('');
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【提取实体】用于判断"亲身经历"(DIRECT) │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
lines.push(` ${queryEntities?.length ? queryEntities.join('、') : '(无)'}`);
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【L0 语义锚点】状态变更加权信号 │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
if (l0Results.length) {
|
||||
const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
|
||||
lines.push(` 召回: ${l0Results.length} 条`);
|
||||
lines.push(` 影响楼层: ${l0Floors.join(', ')}(L1/L2 候选在这些楼层获得 +${CONFIG.L0_FLOOR_BONUS_FACTOR} 加分)`);
|
||||
lines.push('');
|
||||
|
||||
l0Results.slice(0, 10).forEach((r, i) => {
|
||||
lines.push(` ${String(i + 1).padStart(2)}. #${r.floor} ${r.atom.semantic.slice(0, 50)}${r.atom.semantic.length > 50 ? '...' : ''}`);
|
||||
lines.push(` 相似度: ${r.similarity.toFixed(3)}`);
|
||||
});
|
||||
|
||||
if (l0Results.length > 10) {
|
||||
lines.push(` ... 还有 ${l0Results.length - 10} 条`);
|
||||
}
|
||||
} else {
|
||||
lines.push(' 召回: 0 条(无 L0 数据或未启用)');
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push('┌─────────────────────────────────────────────────────────────┐');
|
||||
lines.push('│ 【L1 原文片段】 │');
|
||||
lines.push('└─────────────────────────────────────────────────────────────┘');
|
||||
@@ -706,16 +746,36 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
if (!queryVector?.length) {
|
||||
return { events: [], chunks: [], elapsed: Math.round(performance.now() - T0), logText: 'Empty query vector.' };
|
||||
}
|
||||
|
||||
const lexicon = buildEntityLexicon(store, allEvents);
|
||||
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
|
||||
|
||||
const lexicon = buildEntityLexicon(store, allEvents);
|
||||
const queryEntities = extractEntities([queryText, ...segments].join('\n'), lexicon);
|
||||
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
// L0 召回
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
let l0Results = [];
|
||||
let l0FloorBonus = new Map();
|
||||
let l0VirtualChunks = [];
|
||||
|
||||
try {
|
||||
l0Results = await searchStateAtoms(queryVector, vectorConfig);
|
||||
l0FloorBonus = buildL0FloorBonus(l0Results, CONFIG.L0_FLOOR_BONUS_FACTOR);
|
||||
l0VirtualChunks = stateToVirtualChunks(l0Results);
|
||||
} catch (e) {
|
||||
xbLog.warn(MODULE_ID, 'L0 召回失败,降级处理', e);
|
||||
}
|
||||
|
||||
const [chunkResults, eventResults] = await Promise.all([
|
||||
searchChunks(queryVector, vectorConfig),
|
||||
const [chunkResults, eventResults] = await Promise.all([
|
||||
searchChunks(queryVector, vectorConfig, l0FloorBonus),
|
||||
searchEvents(queryVector, allEvents, vectorConfig, store, queryEntities, l0FloorBonus),
|
||||
]);
|
||||
|
||||
const chunkPreFilterStats = chunkResults._preFilterStats || null;
|
||||
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
// 合并 L0 虚拟 chunks 到 L1
|
||||
// ════════════════════════════════════════════════════════════════════════
|
||||
const mergedChunks = mergeAndSparsify(l0VirtualChunks, chunkResults, CONFIG.FLOOR_MAX_CHUNKS);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// 因果链追溯:从 eventResults 出发找祖先事件
|
||||
@@ -742,25 +802,26 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
|
||||
sortCausalEvents(causalEvents);
|
||||
const causalEventsTruncated = causalEvents.slice(0, CONFIG.CAUSAL_INJECT_MAX);
|
||||
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
const logText = formatRecallLog({
|
||||
elapsed,
|
||||
queryText,
|
||||
segments,
|
||||
weights,
|
||||
const elapsed = Math.round(performance.now() - T0);
|
||||
const logText = formatRecallLog({
|
||||
elapsed,
|
||||
queryText,
|
||||
segments,
|
||||
weights,
|
||||
chunkResults: mergedChunks,
|
||||
eventResults,
|
||||
allEvents,
|
||||
queryEntities,
|
||||
causalEvents: causalEventsTruncated,
|
||||
chunkPreFilterStats,
|
||||
l0Results,
|
||||
});
|
||||
|
||||
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
|
||||
console.log(`Elapsed: ${elapsed}ms | Entities: ${queryEntities.join(', ') || '(none)'}`);
|
||||
console.log(`L1: ${chunkResults.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
|
||||
console.groupEnd();
|
||||
|
||||
});
|
||||
|
||||
console.group('%c[Recall]', 'color: #7c3aed; font-weight: bold');
|
||||
console.log(`Elapsed: ${elapsed}ms | L0: ${l0Results.length} | Entities: ${queryEntities.join(', ') || '(none)'}`);
|
||||
console.log(`L1: ${mergedChunks.length} | L2: ${eventResults.length}/${allEvents.length} | Causal: ${causalEventsTruncated.length}`);
|
||||
console.groupEnd();
|
||||
|
||||
return { events: eventResults, causalEvents: causalEventsTruncated, chunks: mergedChunks, elapsed, logText, queryEntities, l0Results };
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user