chore: update story summary and lint fixes

This commit is contained in:
2026-02-08 12:22:45 +08:00
parent 56e30bfe02
commit d3d818da6a
15 changed files with 2479 additions and 852 deletions

View File

@@ -0,0 +1,388 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector
// 召回质量指标收集与格式化
// ═══════════════════════════════════════════════════════════════════════════
/**
* 创建空的指标对象
*/
export function createMetrics() {
return {
// L0 Query Understanding
l0: {
needRecall: false,
intent: '',
focusEntities: [],
queries: [],
implicitTopics: [],
queryExpansionTime: 0,
atomsMatched: 0,
floorsHit: 0,
topAtoms: [],
},
// L1 Constraints (Facts)
l1: {
factsTotal: 0,
factsInjected: 0,
factsFiltered: 0,
tokens: 0,
samples: [],
},
// L2 Narrative Retrieval
l2: {
eventsInStore: 0,
eventsConsidered: 0,
eventsSelected: 0,
byRecallType: { direct: 0, causal: 0, context: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilterStats: null,
causalChainDepth: 0,
causalEventsCount: 0,
entitiesLoaded: 0,
entityNames: [],
retrievalTime: 0,
},
// L3 Evidence Assembly
l3: {
floorsFromL0: 0,
// 候选规模rerank 前)
chunksInRange: 0,
chunksInRangeByType: { l0Virtual: 0, l1Real: 0 },
// 最终注入rerank + sparse 后)
chunksSelected: 0,
chunksSelectedByType: { l0Virtual: 0, l1Real: 0 },
// 上下文配对
contextPairsAdded: 0,
tokens: 0,
assemblyTime: 0,
// Rerank 相关
rerankApplied: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScoreDistribution: null,
},
// L4 Formatting
l4: {
sectionsIncluded: [],
formattingTime: 0,
},
// Budget Summary
budget: {
total: 0,
limit: 0,
utilization: 0,
breakdown: {
constraints: 0,
events: 0,
entities: 0,
chunks: 0,
recentOrphans: 0,
arcs: 0,
},
},
// Total Timing
timing: {
queryExpansion: 0,
l0Search: 0,
l1Constraints: 0,
l2Retrieval: 0,
l3Retrieval: 0,
l3Rerank: 0,
l3Assembly: 0,
l4Formatting: 0,
total: 0,
},
// Quality Indicators
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
evidenceDensity: 0,
potentialIssues: [],
},
};
}
/**
* 计算相似度分布统计
*/
export function calcSimilarityStats(similarities) {
if (!similarities?.length) {
return { min: 0, max: 0, mean: 0, median: 0 };
}
const sorted = [...similarities].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
};
}
/**
* 格式化指标为可读日志
*/
export function formatMetricsLog(metrics) {
const m = metrics;
const lines = [];
lines.push('');
lines.push('═══════════════════════════════════════════════════════════════════');
lines.push(' Recall Metrics Report ');
lines.push('═══════════════════════════════════════════════════════════════════');
lines.push('');
// L0 Query Understanding
lines.push('[L0] Query Understanding');
lines.push(`├─ need_recall: ${m.l0.needRecall}`);
if (m.l0.needRecall) {
lines.push(`├─ intent: ${m.l0.intent || 'mixed'}`);
lines.push(`├─ focus_entities: [${(m.l0.focusEntities || []).join(', ')}]`);
lines.push(`├─ queries: [${(m.l0.queries || []).slice(0, 3).join(', ')}]`);
lines.push(`├─ query_expansion_time: ${m.l0.queryExpansionTime}ms`);
lines.push(`├─ atoms_matched: ${m.l0.atomsMatched || 0}`);
lines.push(`└─ floors_hit: ${m.l0.floorsHit || 0}`);
}
lines.push('');
// L1 Constraints
lines.push('[L1] Constraints (Facts)');
lines.push(`├─ facts_total: ${m.l1.factsTotal}`);
lines.push(`├─ facts_filtered: ${m.l1.factsFiltered || 0}`);
lines.push(`├─ facts_injected: ${m.l1.factsInjected}`);
lines.push(`├─ tokens: ${m.l1.tokens}`);
if (m.l1.samples && m.l1.samples.length > 0) {
lines.push(`└─ samples: "${m.l1.samples.slice(0, 2).join('", "')}"`);
}
lines.push('');
// L2 Narrative Retrieval
lines.push('[L2] Narrative Retrieval');
lines.push(`├─ events_in_store: ${m.l2.eventsInStore}`);
lines.push(`├─ events_considered: ${m.l2.eventsConsidered}`);
if (m.l2.entityFilterStats) {
const ef = m.l2.entityFilterStats;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before_filter: ${ef.before}`);
lines.push(`│ ├─ after_filter: ${ef.after}`);
lines.push(`│ └─ filtered_out: ${ef.filtered}`);
}
lines.push(`├─ events_selected: ${m.l2.eventsSelected}`);
lines.push(`├─ by_recall_type:`);
lines.push(`│ ├─ direct: ${m.l2.byRecallType.direct}`);
lines.push(`│ ├─ causal: ${m.l2.byRecallType.causal}`);
lines.push(`│ └─ context: ${m.l2.byRecallType.context}`);
const sim = m.l2.similarityDistribution;
if (sim && sim.max > 0) {
lines.push(`├─ similarity_distribution:`);
lines.push(`│ ├─ min: ${sim.min}`);
lines.push(`│ ├─ max: ${sim.max}`);
lines.push(`│ ├─ mean: ${sim.mean}`);
lines.push(`│ └─ median: ${sim.median}`);
}
lines.push(`├─ causal_chain: depth=${m.l2.causalChainDepth}, events=${m.l2.causalEventsCount}`);
lines.push(`├─ entities_loaded: ${m.l2.entitiesLoaded} [${(m.l2.entityNames || []).join(', ')}]`);
lines.push(`└─ retrieval_time: ${m.l2.retrievalTime}ms`);
lines.push('');
// L3 Evidence Assembly
lines.push('[L3] Evidence Assembly');
lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`);
// 候选规模
lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`);
if (m.l3.chunksInRangeByType) {
const cir = m.l3.chunksInRangeByType;
lines.push(`│ ├─ l0_virtual: ${cir.l0Virtual || 0}`);
lines.push(`│ └─ l1_real: ${cir.l1Real || 0}`);
}
// Rerank 信息
if (m.l3.rerankApplied) {
lines.push(`├─ rerank_applied: true`);
lines.push(`│ ├─ before: ${m.l3.beforeRerank}`);
lines.push(`│ ├─ after: ${m.l3.afterRerank}`);
lines.push(`│ └─ time: ${m.l3.rerankTime}ms`);
if (m.l3.rerankScoreDistribution) {
const rd = m.l3.rerankScoreDistribution;
lines.push(`├─ rerank_scores: min=${rd.min}, max=${rd.max}, mean=${rd.mean}`);
}
} else {
lines.push(`├─ rerank_applied: false`);
}
// 最终注入规模
lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`);
if (m.l3.chunksSelectedByType) {
const cs = m.l3.chunksSelectedByType;
lines.push(`│ ├─ l0_virtual: ${cs.l0Virtual || 0}`);
lines.push(`│ └─ l1_real: ${cs.l1Real || 0}`);
}
lines.push(`├─ context_pairs_added: ${m.l3.contextPairsAdded}`);
lines.push(`├─ tokens: ${m.l3.tokens}`);
lines.push(`└─ assembly_time: ${m.l3.assemblyTime}ms`);
lines.push('');
// L4 Formatting
lines.push('[L4] Prompt Formatting');
lines.push(`├─ sections: [${(m.l4.sectionsIncluded || []).join(', ')}]`);
lines.push(`└─ formatting_time: ${m.l4.formattingTime}ms`);
lines.push('');
// Budget Summary
lines.push('[Budget Summary]');
lines.push(`├─ total_tokens: ${m.budget.total}`);
lines.push(`├─ budget_limit: ${m.budget.limit}`);
lines.push(`├─ utilization: ${m.budget.utilization}%`);
lines.push(`└─ breakdown:`);
const bd = m.budget.breakdown || {};
lines.push(` ├─ constraints (L1): ${bd.constraints || 0}`);
lines.push(` ├─ events (L2): ${bd.events || 0}`);
lines.push(` ├─ chunks (L3): ${bd.chunks || 0}`);
lines.push(` ├─ recent_orphans: ${bd.recentOrphans || 0}`);
lines.push(` └─ arcs: ${bd.arcs || 0}`);
lines.push('');
// Timing
lines.push('[Timing]');
lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`);
lines.push(`├─ l0_search: ${m.timing.l0Search}ms`);
lines.push(`├─ l1_constraints: ${m.timing.l1Constraints}ms`);
lines.push(`├─ l2_retrieval: ${m.timing.l2Retrieval}ms`);
lines.push(`├─ l3_retrieval: ${m.timing.l3Retrieval}ms`);
if (m.timing.l3Rerank > 0) {
lines.push(`├─ l3_rerank: ${m.timing.l3Rerank}ms`);
}
lines.push(`├─ l3_assembly: ${m.timing.l3Assembly}ms`);
lines.push(`├─ l4_formatting: ${m.timing.l4Formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
lines.push('');
// Quality Indicators
lines.push('[Quality Indicators]');
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
m.quality.potentialIssues.forEach((issue, i) => {
const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─';
lines.push(`${prefix}${issue}`);
});
} else {
lines.push(`└─ potential_issues: none`);
}
lines.push('');
lines.push('═══════════════════════════════════════════════════════════════════');
lines.push('');
return lines.join('\n');
}
/**
* 检测潜在问题
*/
export function detectIssues(metrics) {
const issues = [];
const m = metrics;
// 召回比例问题
if (m.l2.eventsConsidered > 0) {
const selectRatio = m.l2.eventsSelected / m.l2.eventsConsidered;
if (selectRatio < 0.1) {
issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (selectRatio > 0.6 && m.l2.eventsConsidered > 10) {
issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`);
}
}
// 实体过滤问题
if (m.l2.entityFilterStats) {
const ef = m.l2.entityFilterStats;
if (ef.filtered === 0 && ef.before > 10) {
issues.push(`No events filtered by entity - focus entities may be too broad or missing`);
}
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
}
}
// 相似度问题
if (m.l2.similarityDistribution && m.l2.similarityDistribution.min > 0 && m.l2.similarityDistribution.min < 0.5) {
issues.push(`Low similarity events included (min=${m.l2.similarityDistribution.min})`);
}
// 因果链问题
if (m.l2.eventsSelected > 0 && m.l2.causalEventsCount === 0 && m.l2.byRecallType.direct === 0) {
issues.push('No direct or causal events - query expansion may be inaccurate');
}
// L0 atoms 问题
if ((m.l0.atomsMatched || 0) === 0) {
issues.push('L0 atoms not matched - may need to generate anchors');
}
// Rerank 相关问题
if (m.l3.rerankApplied) {
if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) {
const filterRatio = 1 - (m.l3.afterRerank / m.l3.beforeRerank);
if (filterRatio > 0.7) {
issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`);
}
}
if (m.l3.rerankScoreDistribution) {
const rd = m.l3.rerankScoreDistribution;
if (rd.max < 0.5) {
issues.push(`Low rerank scores (max=${rd.max}) - query may be poorly matched`);
}
if (rd.mean < 0.3) {
issues.push(`Very low average rerank score (mean=${rd.mean}) - context may be weak`);
}
}
if (m.l3.rerankTime > 2000) {
issues.push(`Slow rerank (${m.l3.rerankTime}ms) - may affect response time`);
}
}
// 证据密度问题(基于 selected 的构成)
if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) {
const l1Real = m.l3.chunksSelectedByType.l1Real || 0;
const density = l1Real / m.l3.chunksSelected;
if (density < 0.3 && m.l3.chunksSelected > 10) {
issues.push(`Low L1 chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`);
}
}
// 预算问题
if (m.budget.utilization > 90) {
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
}
// 性能问题
if (m.timing.total > 5000) {
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
}
return issues;
}

File diff suppressed because it is too large Load Diff