Files
LittleWhiteBox/modules/story-summary/vector/retrieval/metrics.js

531 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence)
//
// 命名规范:
// - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact
// - 指标层用语义名称anchor/evidence/event/constraint/arc
//
// 架构变更v3 → v4
// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构
// - 删除 mergedByType / selectedByType不再有混合池
// - 新增 l0Candidates / l0Selected / l1Pulled / l1Attached / l1CosineTime
// - fusion 区块明确标注 L0-only删除 anchorCount
// - quality.chunkRealRatio → quality.l1AttachRate
// ═══════════════════════════════════════════════════════════════════════════
/**
* 创建空的指标对象
* @returns {object}
*/
export function createMetrics() {
return {
// Query Build - 查询构建
query: {
buildTime: 0,
refineTime: 0,
lengths: {
v0Chars: 0,
v1Chars: null, // null = NA
rerankChars: 0,
},
},
// Anchor (L0 StateAtoms) - 语义锚点
anchor: {
needRecall: false,
focusEntities: [],
matched: 0,
floorsHit: 0,
topHits: [],
},
// Lexical (MiniSearch) - 词法检索
lexical: {
terms: [],
atomHits: 0,
chunkHits: 0,
eventHits: 0,
searchTime: 0,
},
// Fusion (W-RRF, L0-only) - 多路融合
fusion: {
denseCount: 0,
lexCount: 0,
totalUnique: 0,
afterCap: 0,
time: 0,
},
// Constraint (L3 Facts) - 世界约束
constraint: {
total: 0,
filtered: 0,
injected: 0,
tokens: 0,
samples: [],
},
// Event (L2 Events) - 事件摘要
event: {
inStore: 0,
considered: 0,
selected: 0,
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilter: null,
causalChainDepth: 0,
causalCount: 0,
entitiesUsed: 0,
entityNames: [],
},
// Evidence (Two-Stage: L0 rerank → L1 pull) - 原文证据
evidence: {
// Stage 1: L0
l0Candidates: 0, // W-RRF 融合后的 L0 候选数
l0Selected: 0, // rerank 后选中的 L0 数
rerankApplied: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScores: null,
// Stage 2: L1
l1Pulled: 0, // 从 DB 拉取的 L1 chunk 总数
l1Attached: 0, // 实际挂载的 L1 数top-1 × 楼层 × 2侧
l1CosineTime: 0, // L1 cosine 打分耗时
// 装配
contextPairsAdded: 0, // 保留兼容(= l1Attached 中 USER 侧数量)
tokens: 0,
assemblyTime: 0,
},
// Arc - 人物弧光
arc: {
injected: 0,
tokens: 0,
},
// Formatting - 格式化
formatting: {
sectionsIncluded: [],
time: 0,
},
// Budget Summary - 预算
budget: {
total: 0,
limit: 0,
utilization: 0,
breakdown: {
constraints: 0,
events: 0,
distantEvidence: 0,
recentEvidence: 0,
arcs: 0,
},
},
// Timing - 计时
timing: {
queryBuild: 0,
queryRefine: 0,
anchorSearch: 0,
lexicalSearch: 0,
fusion: 0,
constraintFilter: 0,
eventRetrieval: 0,
evidenceRetrieval: 0,
evidenceRerank: 0,
evidenceAssembly: 0,
formatting: 0,
total: 0,
},
// Quality Indicators - 质量指标
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
l1AttachRate: 0, // 有 L1 挂载的 L0 占比
potentialIssues: [],
},
};
}
/**
* 计算相似度分布统计
* @param {number[]} similarities
* @returns {{min: number, max: number, mean: number, median: number}}
*/
export function calcSimilarityStats(similarities) {
if (!similarities?.length) {
return { min: 0, max: 0, mean: 0, median: 0 };
}
const sorted = [...similarities].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
};
}
/**
* 格式化指标为可读日志
* @param {object} metrics
* @returns {string}
*/
export function formatMetricsLog(metrics) {
const m = metrics;
const lines = [];
lines.push('');
lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report (v4) ');
lines.push('════════════════════════════════════════');
lines.push('');
// Query Length
lines.push('[Query Length] 查询长度');
lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`);
lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
lines.push('');
// Query Build
lines.push('[Query] 查询构建');
lines.push(`├─ build_time: ${m.query.buildTime}ms`);
lines.push(`└─ refine_time: ${m.query.refineTime}ms`);
lines.push('');
// Anchor (L0 StateAtoms)
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
if (m.anchor.needRecall) {
lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
}
lines.push('');
// Lexical (MiniSearch)
lines.push('[Lexical] MiniSearch - 词法检索');
lines.push(`├─ terms: [${(m.lexical.terms || []).slice(0, 8).join(', ')}]`);
lines.push(`├─ atom_hits: ${m.lexical.atomHits}`);
lines.push(`├─ chunk_hits: ${m.lexical.chunkHits}`);
lines.push(`├─ event_hits: ${m.lexical.eventHits}`);
lines.push(`└─ search_time: ${m.lexical.searchTime}ms`);
lines.push('');
// Fusion (W-RRF, L0-only)
lines.push('[Fusion] W-RRF (L0-only) - 多路融合');
lines.push(`├─ dense_count: ${m.fusion.denseCount}`);
lines.push(`├─ lex_count: ${m.fusion.lexCount}`);
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
lines.push(`└─ time: ${m.fusion.time}ms`);
lines.push('');
// Constraint (L3 Facts)
lines.push('[Constraint] L3 Facts - 世界约束');
lines.push(`├─ total: ${m.constraint.total}`);
lines.push(`├─ filtered: ${m.constraint.filtered || 0}`);
lines.push(`├─ injected: ${m.constraint.injected}`);
lines.push(`├─ tokens: ${m.constraint.tokens}`);
if (m.constraint.samples && m.constraint.samples.length > 0) {
lines.push(`└─ samples: "${m.constraint.samples.slice(0, 2).join('", "')}"`);
}
lines.push('');
// Event (L2 Events)
lines.push('[Event] L2 Events - 事件摘要');
lines.push(`├─ in_store: ${m.event.inStore}`);
lines.push(`├─ considered: ${m.event.considered}`);
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before: ${ef.before}`);
lines.push(`│ ├─ after: ${ef.after}`);
lines.push(`│ └─ filtered: ${ef.filtered}`);
}
lines.push(`├─ selected: ${m.event.selected}`);
lines.push(`├─ by_recall_type:`);
lines.push(`│ ├─ direct: ${m.event.byRecallType.direct}`);
lines.push(`│ ├─ related: ${m.event.byRecallType.related}`);
lines.push(`│ ├─ causal: ${m.event.byRecallType.causal}`);
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
const sim = m.event.similarityDistribution;
if (sim && sim.max > 0) {
lines.push(`├─ similarity_distribution:`);
lines.push(`│ ├─ min: ${sim.min}`);
lines.push(`│ ├─ max: ${sim.max}`);
lines.push(`│ ├─ mean: ${sim.mean}`);
lines.push(`│ └─ median: ${sim.median}`);
}
lines.push(`├─ causal_chain: depth=${m.event.causalChainDepth}, count=${m.event.causalCount}`);
lines.push(`└─ entities_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}]`);
lines.push('');
// Evidence (Two-Stage)
lines.push('[Evidence] Two-Stage: L0 Locate → L1 Pull');
lines.push(`├─ Stage 1 (L0):`);
lines.push(`│ ├─ candidates (post-fusion): ${m.evidence.l0Candidates}`);
if (m.evidence.rerankApplied) {
lines.push(`│ ├─ rerank_applied: true`);
lines.push(`│ │ ├─ before: ${m.evidence.beforeRerank}`);
lines.push(`│ │ ├─ after: ${m.evidence.afterRerank}`);
lines.push(`│ │ └─ time: ${m.evidence.rerankTime}ms`);
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
lines.push(`│ ├─ rerank_scores: min=${rs.min}, max=${rs.max}, mean=${rs.mean}`);
}
} else {
lines.push(`│ ├─ rerank_applied: false`);
}
lines.push(`│ └─ selected: ${m.evidence.l0Selected}`);
lines.push(`├─ Stage 2 (L1):`);
lines.push(`│ ├─ pulled: ${m.evidence.l1Pulled}`);
lines.push(`│ ├─ attached: ${m.evidence.l1Attached}`);
lines.push(`│ └─ cosine_time: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ tokens: ${m.evidence.tokens}`);
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push('');
// Arc
if (m.arc.injected > 0) {
lines.push('[Arc] 人物弧光');
lines.push(`├─ injected: ${m.arc.injected}`);
lines.push(`└─ tokens: ${m.arc.tokens}`);
lines.push('');
}
// Formatting
lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
lines.push(`└─ time: ${m.formatting.time}ms`);
lines.push('');
// Budget Summary
lines.push('[Budget] 预算');
lines.push(`├─ total_tokens: ${m.budget.total}`);
lines.push(`├─ limit: ${m.budget.limit}`);
lines.push(`├─ utilization: ${m.budget.utilization}%`);
lines.push(`└─ breakdown:`);
const bd = m.budget.breakdown || {};
lines.push(` ├─ constraints: ${bd.constraints || 0}`);
lines.push(` ├─ events: ${bd.events || 0}`);
lines.push(` ├─ distant_evidence: ${bd.distantEvidence || 0}`);
lines.push(` ├─ recent_evidence: ${bd.recentEvidence || 0}`);
lines.push(` └─ arcs: ${bd.arcs || 0}`);
lines.push('');
// Timing
lines.push('[Timing] 计时');
lines.push(`├─ query_build: ${m.query.buildTime}ms`);
lines.push(`├─ query_refine: ${m.query.refineTime}ms`);
lines.push(`├─ anchor_search: ${m.timing.anchorSearch}ms`);
lines.push(`├─ lexical_search: ${m.lexical.searchTime}ms`);
lines.push(`├─ fusion: ${m.fusion.time}ms`);
lines.push(`├─ constraint_filter: ${m.timing.constraintFilter}ms`);
lines.push(`├─ event_retrieval: ${m.timing.eventRetrieval}ms`);
lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
if (m.timing.evidenceRerank > 0) {
lines.push(`├─ evidence_rerank: ${m.timing.evidenceRerank}ms`);
}
lines.push(`├─ l1_cosine: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ evidence_assembly: ${m.timing.evidenceAssembly}ms`);
lines.push(`├─ formatting: ${m.timing.formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
lines.push('');
// Quality Indicators
lines.push('[Quality] 质量指标');
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
m.quality.potentialIssues.forEach((issue, i) => {
const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─';
lines.push(`${prefix}${issue}`);
});
} else {
lines.push(`└─ potential_issues: none`);
}
lines.push('');
lines.push('════════════════════════════════════════');
lines.push('');
return lines.join('\n');
}
/**
* 检测潜在问题
* @param {object} metrics
* @returns {string[]}
*/
export function detectIssues(metrics) {
const issues = [];
const m = metrics;
// ─────────────────────────────────────────────────────────────────
// 查询构建问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.focusEntities || []).length === 0) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
}
// ─────────────────────────────────────────────────────────────────
// 锚点匹配问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.matched || 0) === 0 && m.anchor.needRecall) {
issues.push('No anchors matched - may need to generate anchors');
}
// ─────────────────────────────────────────────────────────────────
// 词法检索问题
// ─────────────────────────────────────────────────────────────────
if ((m.lexical.terms || []).length > 0 && m.lexical.atomHits === 0 && m.lexical.chunkHits === 0 && m.lexical.eventHits === 0) {
issues.push('Lexical search returned zero hits - terms may not match any indexed content');
}
// ─────────────────────────────────────────────────────────────────
// 融合问题L0-only
// ─────────────────────────────────────────────────────────────────
if (m.fusion.lexCount === 0 && m.fusion.denseCount > 0) {
issues.push('No lexical L0 candidates in fusion - hybrid retrieval not contributing');
}
if (m.fusion.afterCap === 0) {
issues.push('Fusion produced zero L0 candidates - all retrieval paths may have failed');
}
// ─────────────────────────────────────────────────────────────────
// 事件召回问题
// ─────────────────────────────────────────────────────────────────
if (m.event.considered > 0) {
const denseSelected =
(m.event.byRecallType?.direct || 0) +
(m.event.byRecallType?.related || 0);
const denseSelectRatio = denseSelected / m.event.considered;
if (denseSelectRatio < 0.1) {
issues.push(`Dense event selection ratio too low (${(denseSelectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (denseSelectRatio > 0.6 && m.event.considered > 10) {
issues.push(`Dense event selection ratio high (${(denseSelectRatio * 100).toFixed(1)}%) - may include noise`);
}
}
// 实体过滤问题
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
if (ef.filtered === 0 && ef.before > 10) {
issues.push('No events filtered by entity - focus entities may be too broad or missing');
}
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
}
}
// 相似度问题
if (m.event.similarityDistribution && m.event.similarityDistribution.min > 0 && m.event.similarityDistribution.min < 0.5) {
issues.push(`Low similarity events included (min=${m.event.similarityDistribution.min})`);
}
// 因果链问题
if (m.event.selected > 0 && m.event.causalCount === 0 && m.event.byRecallType.direct === 0) {
issues.push('No direct or causal events - query may not align with stored events');
}
// ─────────────────────────────────────────────────────────────────
// L0 Rerank 问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.rerankApplied) {
if (m.evidence.beforeRerank > 0 && m.evidence.afterRerank > 0) {
const filterRatio = 1 - (m.evidence.afterRerank / m.evidence.beforeRerank);
if (filterRatio > 0.7) {
issues.push(`High L0 rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant L0 in fusion output`);
}
}
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
if (rs.max < 0.5) {
issues.push(`Low L0 rerank scores (max=${rs.max}) - query may be poorly matched`);
}
if (rs.mean < 0.3) {
issues.push(`Very low average L0 rerank score (mean=${rs.mean}) - context may be weak`);
}
}
if (m.evidence.rerankTime > 2000) {
issues.push(`Slow L0 rerank (${m.evidence.rerankTime}ms) - may affect response time`);
}
}
// ─────────────────────────────────────────────────────────────────
// L1 挂载问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.l0Selected > 0 && m.evidence.l1Pulled === 0) {
issues.push('Zero L1 chunks pulled - L1 vectors may not exist or DB read failed');
}
if (m.evidence.l0Selected > 0 && m.evidence.l1Attached === 0 && m.evidence.l1Pulled > 0) {
issues.push('L1 chunks pulled but none attached - cosine scores may be too low or floor mismatch');
}
const l1AttachRate = m.quality.l1AttachRate || 0;
if (m.evidence.l0Selected > 5 && l1AttachRate < 20) {
issues.push(`Low L1 attach rate (${l1AttachRate}%) - many L0 lack concrete dialogue evidence`);
}
// ─────────────────────────────────────────────────────────────────
// 预算问题
// ─────────────────────────────────────────────────────────────────
if (m.budget.utilization > 90) {
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
}
// ─────────────────────────────────────────────────────────────────
// 性能问题
// ─────────────────────────────────────────────────────────────────
if (m.timing.total > 8000) {
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
}
if (m.query.buildTime > 100) {
issues.push(`Slow query build (${m.query.buildTime}ms) - entity lexicon may be too large`);
}
if (m.evidence.l1CosineTime > 1000) {
issues.push(`Slow L1 cosine scoring (${m.evidence.l1CosineTime}ms) - too many chunks pulled`);
}
return issues;
}