Files
LittleWhiteBox/modules/story-summary/vector/retrieval/metrics.js

389 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector
// 召回质量指标收集与格式化
// ═══════════════════════════════════════════════════════════════════════════
/**
* 创建空的指标对象
*/
export function createMetrics() {
return {
// L0 Query Understanding
l0: {
needRecall: false,
intent: '',
focusEntities: [],
queries: [],
implicitTopics: [],
queryExpansionTime: 0,
atomsMatched: 0,
floorsHit: 0,
topAtoms: [],
},
// L1 Constraints (Facts)
l1: {
factsTotal: 0,
factsInjected: 0,
factsFiltered: 0,
tokens: 0,
samples: [],
},
// L2 Narrative Retrieval
l2: {
eventsInStore: 0,
eventsConsidered: 0,
eventsSelected: 0,
byRecallType: { direct: 0, causal: 0, context: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilterStats: null,
causalChainDepth: 0,
causalEventsCount: 0,
entitiesLoaded: 0,
entityNames: [],
retrievalTime: 0,
},
// L3 Evidence Assembly
l3: {
floorsFromL0: 0,
// 候选规模rerank 前)
chunksInRange: 0,
chunksInRangeByType: { l0Virtual: 0, l1Real: 0 },
// 最终注入rerank + sparse 后)
chunksSelected: 0,
chunksSelectedByType: { l0Virtual: 0, l1Real: 0 },
// 上下文配对
contextPairsAdded: 0,
tokens: 0,
assemblyTime: 0,
// Rerank 相关
rerankApplied: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScoreDistribution: null,
},
// L4 Formatting
l4: {
sectionsIncluded: [],
formattingTime: 0,
},
// Budget Summary
budget: {
total: 0,
limit: 0,
utilization: 0,
breakdown: {
constraints: 0,
events: 0,
entities: 0,
chunks: 0,
recentOrphans: 0,
arcs: 0,
},
},
// Total Timing
timing: {
queryExpansion: 0,
l0Search: 0,
l1Constraints: 0,
l2Retrieval: 0,
l3Retrieval: 0,
l3Rerank: 0,
l3Assembly: 0,
l4Formatting: 0,
total: 0,
},
// Quality Indicators
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
evidenceDensity: 0,
potentialIssues: [],
},
};
}
/**
* 计算相似度分布统计
*/
export function calcSimilarityStats(similarities) {
if (!similarities?.length) {
return { min: 0, max: 0, mean: 0, median: 0 };
}
const sorted = [...similarities].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
};
}
/**
* 格式化指标为可读日志
*/
export function formatMetricsLog(metrics) {
const m = metrics;
const lines = [];
lines.push('');
lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report ');
lines.push('════════════════════════════════════════');
lines.push('');
// L0 Query Understanding
lines.push('[L0] Query Understanding');
lines.push(`├─ need_recall: ${m.l0.needRecall}`);
if (m.l0.needRecall) {
lines.push(`├─ intent: ${m.l0.intent || 'mixed'}`);
lines.push(`├─ focus_entities: [${(m.l0.focusEntities || []).join(', ')}]`);
lines.push(`├─ queries: [${(m.l0.queries || []).slice(0, 3).join(', ')}]`);
lines.push(`├─ query_expansion_time: ${m.l0.queryExpansionTime}ms`);
lines.push(`├─ atoms_matched: ${m.l0.atomsMatched || 0}`);
lines.push(`└─ floors_hit: ${m.l0.floorsHit || 0}`);
}
lines.push('');
// L1 Constraints
lines.push('[L1] Constraints (Facts)');
lines.push(`├─ facts_total: ${m.l1.factsTotal}`);
lines.push(`├─ facts_filtered: ${m.l1.factsFiltered || 0}`);
lines.push(`├─ facts_injected: ${m.l1.factsInjected}`);
lines.push(`├─ tokens: ${m.l1.tokens}`);
if (m.l1.samples && m.l1.samples.length > 0) {
lines.push(`└─ samples: "${m.l1.samples.slice(0, 2).join('", "')}"`);
}
lines.push('');
// L2 Narrative Retrieval
lines.push('[L2] Narrative Retrieval');
lines.push(`├─ events_in_store: ${m.l2.eventsInStore}`);
lines.push(`├─ events_considered: ${m.l2.eventsConsidered}`);
if (m.l2.entityFilterStats) {
const ef = m.l2.entityFilterStats;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before_filter: ${ef.before}`);
lines.push(`│ ├─ after_filter: ${ef.after}`);
lines.push(`│ └─ filtered_out: ${ef.filtered}`);
}
lines.push(`├─ events_selected: ${m.l2.eventsSelected}`);
lines.push(`├─ by_recall_type:`);
lines.push(`│ ├─ direct: ${m.l2.byRecallType.direct}`);
lines.push(`│ ├─ causal: ${m.l2.byRecallType.causal}`);
lines.push(`│ └─ context: ${m.l2.byRecallType.context}`);
const sim = m.l2.similarityDistribution;
if (sim && sim.max > 0) {
lines.push(`├─ similarity_distribution:`);
lines.push(`│ ├─ min: ${sim.min}`);
lines.push(`│ ├─ max: ${sim.max}`);
lines.push(`│ ├─ mean: ${sim.mean}`);
lines.push(`│ └─ median: ${sim.median}`);
}
lines.push(`├─ causal_chain: depth=${m.l2.causalChainDepth}, events=${m.l2.causalEventsCount}`);
lines.push(`├─ entities_loaded: ${m.l2.entitiesLoaded} [${(m.l2.entityNames || []).join(', ')}]`);
lines.push(`└─ retrieval_time: ${m.l2.retrievalTime}ms`);
lines.push('');
// L3 Evidence Assembly
lines.push('[L3] Evidence Assembly');
lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`);
// 候选规模
lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`);
if (m.l3.chunksInRangeByType) {
const cir = m.l3.chunksInRangeByType;
lines.push(`│ ├─ l0_virtual: ${cir.l0Virtual || 0}`);
lines.push(`│ └─ l1_real: ${cir.l1Real || 0}`);
}
// Rerank 信息
if (m.l3.rerankApplied) {
lines.push(`├─ rerank_applied: true`);
lines.push(`│ ├─ before: ${m.l3.beforeRerank}`);
lines.push(`│ ├─ after: ${m.l3.afterRerank}`);
lines.push(`│ └─ time: ${m.l3.rerankTime}ms`);
if (m.l3.rerankScoreDistribution) {
const rd = m.l3.rerankScoreDistribution;
lines.push(`├─ rerank_scores: min=${rd.min}, max=${rd.max}, mean=${rd.mean}`);
}
} else {
lines.push(`├─ rerank_applied: false`);
}
// 最终注入规模
lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`);
if (m.l3.chunksSelectedByType) {
const cs = m.l3.chunksSelectedByType;
lines.push(`│ ├─ l0_virtual: ${cs.l0Virtual || 0}`);
lines.push(`│ └─ l1_real: ${cs.l1Real || 0}`);
}
lines.push(`├─ context_pairs_added: ${m.l3.contextPairsAdded}`);
lines.push(`├─ tokens: ${m.l3.tokens}`);
lines.push(`└─ assembly_time: ${m.l3.assemblyTime}ms`);
lines.push('');
// L4 Formatting
lines.push('[L4] Prompt Formatting');
lines.push(`├─ sections: [${(m.l4.sectionsIncluded || []).join(', ')}]`);
lines.push(`└─ formatting_time: ${m.l4.formattingTime}ms`);
lines.push('');
// Budget Summary
lines.push('[Budget Summary]');
lines.push(`├─ total_tokens: ${m.budget.total}`);
lines.push(`├─ budget_limit: ${m.budget.limit}`);
lines.push(`├─ utilization: ${m.budget.utilization}%`);
lines.push(`└─ breakdown:`);
const bd = m.budget.breakdown || {};
lines.push(` ├─ constraints (L1): ${bd.constraints || 0}`);
lines.push(` ├─ events (L2): ${bd.events || 0}`);
lines.push(` ├─ chunks (L3): ${bd.chunks || 0}`);
lines.push(` ├─ recent_orphans: ${bd.recentOrphans || 0}`);
lines.push(` └─ arcs: ${bd.arcs || 0}`);
lines.push('');
// Timing
lines.push('[Timing]');
lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`);
lines.push(`├─ l0_search: ${m.timing.l0Search}ms`);
lines.push(`├─ l1_constraints: ${m.timing.l1Constraints}ms`);
lines.push(`├─ l2_retrieval: ${m.timing.l2Retrieval}ms`);
lines.push(`├─ l3_retrieval: ${m.timing.l3Retrieval}ms`);
if (m.timing.l3Rerank > 0) {
lines.push(`├─ l3_rerank: ${m.timing.l3Rerank}ms`);
}
lines.push(`├─ l3_assembly: ${m.timing.l3Assembly}ms`);
lines.push(`├─ l4_formatting: ${m.timing.l4Formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
lines.push('');
// Quality Indicators
lines.push('[Quality Indicators]');
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
m.quality.potentialIssues.forEach((issue, i) => {
const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─';
lines.push(`${prefix}${issue}`);
});
} else {
lines.push(`└─ potential_issues: none`);
}
lines.push('');
lines.push('════════════════════════════════════════');
lines.push('');
return lines.join('\n');
}
/**
* 检测潜在问题
*/
export function detectIssues(metrics) {
const issues = [];
const m = metrics;
// 召回比例问题
if (m.l2.eventsConsidered > 0) {
const selectRatio = m.l2.eventsSelected / m.l2.eventsConsidered;
if (selectRatio < 0.1) {
issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (selectRatio > 0.6 && m.l2.eventsConsidered > 10) {
issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`);
}
}
// 实体过滤问题
if (m.l2.entityFilterStats) {
const ef = m.l2.entityFilterStats;
if (ef.filtered === 0 && ef.before > 10) {
issues.push(`No events filtered by entity - focus entities may be too broad or missing`);
}
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
}
}
// 相似度问题
if (m.l2.similarityDistribution && m.l2.similarityDistribution.min > 0 && m.l2.similarityDistribution.min < 0.5) {
issues.push(`Low similarity events included (min=${m.l2.similarityDistribution.min})`);
}
// 因果链问题
if (m.l2.eventsSelected > 0 && m.l2.causalEventsCount === 0 && m.l2.byRecallType.direct === 0) {
issues.push('No direct or causal events - query expansion may be inaccurate');
}
// L0 atoms 问题
if ((m.l0.atomsMatched || 0) === 0) {
issues.push('L0 atoms not matched - may need to generate anchors');
}
// Rerank 相关问题
if (m.l3.rerankApplied) {
if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) {
const filterRatio = 1 - (m.l3.afterRerank / m.l3.beforeRerank);
if (filterRatio > 0.7) {
issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`);
}
}
if (m.l3.rerankScoreDistribution) {
const rd = m.l3.rerankScoreDistribution;
if (rd.max < 0.5) {
issues.push(`Low rerank scores (max=${rd.max}) - query may be poorly matched`);
}
if (rd.mean < 0.3) {
issues.push(`Very low average rerank score (mean=${rd.mean}) - context may be weak`);
}
}
if (m.l3.rerankTime > 2000) {
issues.push(`Slow rerank (${m.l3.rerankTime}ms) - may affect response time`);
}
}
// 证据密度问题(基于 selected 的构成)
if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) {
const l1Real = m.l3.chunksSelectedByType.l1Real || 0;
const density = l1Real / m.l3.chunksSelected;
if (density < 0.3 && m.l3.chunksSelected > 10) {
issues.push(`Low L1 chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`);
}
}
// 预算问题
if (m.budget.utilization > 90) {
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
}
// 性能问题
if (m.timing.total > 5000) {
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
}
return issues;
}