// ═══════════════════════════════════════════════════════════════════════════ // Story Summary - Metrics Collector // 召回质量指标收集与格式化 // ═══════════════════════════════════════════════════════════════════════════ /** * 创建空的指标对象 */ export function createMetrics() { return { // L0 Query Understanding l0: { needRecall: false, intent: '', focusEntities: [], queries: [], implicitTopics: [], queryExpansionTime: 0, atomsMatched: 0, floorsHit: 0, topAtoms: [], }, // L1 Constraints (Facts) l1: { factsTotal: 0, factsInjected: 0, factsFiltered: 0, tokens: 0, samples: [], }, // L2 Narrative Retrieval l2: { eventsInStore: 0, eventsConsidered: 0, eventsSelected: 0, byRecallType: { direct: 0, causal: 0, context: 0 }, similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 }, entityFilterStats: null, causalChainDepth: 0, causalEventsCount: 0, entitiesLoaded: 0, entityNames: [], retrievalTime: 0, }, // L3 Evidence Assembly l3: { floorsFromL0: 0, // 候选规模(rerank 前) chunksInRange: 0, chunksInRangeByType: { l0Virtual: 0, l1Real: 0 }, // 最终注入(rerank + sparse 后) chunksSelected: 0, chunksSelectedByType: { l0Virtual: 0, l1Real: 0 }, // 上下文配对 contextPairsAdded: 0, tokens: 0, assemblyTime: 0, // Rerank 相关 rerankApplied: false, beforeRerank: 0, afterRerank: 0, rerankTime: 0, rerankScoreDistribution: null, }, // L4 Formatting l4: { sectionsIncluded: [], formattingTime: 0, }, // Budget Summary budget: { total: 0, limit: 0, utilization: 0, breakdown: { constraints: 0, events: 0, entities: 0, chunks: 0, recentOrphans: 0, arcs: 0, }, }, // Total Timing timing: { queryExpansion: 0, l0Search: 0, l1Constraints: 0, l2Retrieval: 0, l3Retrieval: 0, l3Rerank: 0, l3Assembly: 0, l4Formatting: 0, total: 0, }, // Quality Indicators quality: { constraintCoverage: 100, eventPrecisionProxy: 0, evidenceDensity: 0, potentialIssues: [], }, }; } /** * 计算相似度分布统计 */ export function calcSimilarityStats(similarities) { if (!similarities?.length) { return { min: 0, max: 0, mean: 0, median: 0 }; } const sorted = [...similarities].sort((a, b) => a - b); const sum = sorted.reduce((a, b) => a + b, 0); return { min: Number(sorted[0].toFixed(3)), max: Number(sorted[sorted.length - 1].toFixed(3)), mean: Number((sum / sorted.length).toFixed(3)), median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)), }; } /** * 格式化指标为可读日志 */ export function formatMetricsLog(metrics) { const m = metrics; const lines = []; lines.push(''); lines.push('════════════════════════════════════════'); lines.push(' Recall Metrics Report '); lines.push('════════════════════════════════════════'); lines.push(''); // L0 Query Understanding lines.push('[L0] Query Understanding'); lines.push(`├─ need_recall: ${m.l0.needRecall}`); if (m.l0.needRecall) { lines.push(`├─ intent: ${m.l0.intent || 'mixed'}`); lines.push(`├─ focus_entities: [${(m.l0.focusEntities || []).join(', ')}]`); lines.push(`├─ queries: [${(m.l0.queries || []).slice(0, 3).join(', ')}]`); lines.push(`├─ query_expansion_time: ${m.l0.queryExpansionTime}ms`); lines.push(`├─ atoms_matched: ${m.l0.atomsMatched || 0}`); lines.push(`└─ floors_hit: ${m.l0.floorsHit || 0}`); } lines.push(''); // L1 Constraints lines.push('[L1] Constraints (Facts)'); lines.push(`├─ facts_total: ${m.l1.factsTotal}`); lines.push(`├─ facts_filtered: ${m.l1.factsFiltered || 0}`); lines.push(`├─ facts_injected: ${m.l1.factsInjected}`); lines.push(`├─ tokens: ${m.l1.tokens}`); if (m.l1.samples && m.l1.samples.length > 0) { lines.push(`└─ samples: "${m.l1.samples.slice(0, 2).join('", "')}"`); } lines.push(''); // L2 Narrative Retrieval lines.push('[L2] Narrative Retrieval'); lines.push(`├─ events_in_store: ${m.l2.eventsInStore}`); lines.push(`├─ events_considered: ${m.l2.eventsConsidered}`); if (m.l2.entityFilterStats) { const ef = m.l2.entityFilterStats; lines.push(`├─ entity_filter:`); lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`); lines.push(`│ ├─ before_filter: ${ef.before}`); lines.push(`│ ├─ after_filter: ${ef.after}`); lines.push(`│ └─ filtered_out: ${ef.filtered}`); } lines.push(`├─ events_selected: ${m.l2.eventsSelected}`); lines.push(`├─ by_recall_type:`); lines.push(`│ ├─ direct: ${m.l2.byRecallType.direct}`); lines.push(`│ ├─ causal: ${m.l2.byRecallType.causal}`); lines.push(`│ └─ context: ${m.l2.byRecallType.context}`); const sim = m.l2.similarityDistribution; if (sim && sim.max > 0) { lines.push(`├─ similarity_distribution:`); lines.push(`│ ├─ min: ${sim.min}`); lines.push(`│ ├─ max: ${sim.max}`); lines.push(`│ ├─ mean: ${sim.mean}`); lines.push(`│ └─ median: ${sim.median}`); } lines.push(`├─ causal_chain: depth=${m.l2.causalChainDepth}, events=${m.l2.causalEventsCount}`); lines.push(`├─ entities_loaded: ${m.l2.entitiesLoaded} [${(m.l2.entityNames || []).join(', ')}]`); lines.push(`└─ retrieval_time: ${m.l2.retrievalTime}ms`); lines.push(''); // L3 Evidence Assembly lines.push('[L3] Evidence Assembly'); lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`); // 候选规模 lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`); if (m.l3.chunksInRangeByType) { const cir = m.l3.chunksInRangeByType; lines.push(`│ ├─ l0_virtual: ${cir.l0Virtual || 0}`); lines.push(`│ └─ l1_real: ${cir.l1Real || 0}`); } // Rerank 信息 if (m.l3.rerankApplied) { lines.push(`├─ rerank_applied: true`); lines.push(`│ ├─ before: ${m.l3.beforeRerank}`); lines.push(`│ ├─ after: ${m.l3.afterRerank}`); lines.push(`│ └─ time: ${m.l3.rerankTime}ms`); if (m.l3.rerankScoreDistribution) { const rd = m.l3.rerankScoreDistribution; lines.push(`├─ rerank_scores: min=${rd.min}, max=${rd.max}, mean=${rd.mean}`); } } else { lines.push(`├─ rerank_applied: false`); } // 最终注入规模 lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`); if (m.l3.chunksSelectedByType) { const cs = m.l3.chunksSelectedByType; lines.push(`│ ├─ l0_virtual: ${cs.l0Virtual || 0}`); lines.push(`│ └─ l1_real: ${cs.l1Real || 0}`); } lines.push(`├─ context_pairs_added: ${m.l3.contextPairsAdded}`); lines.push(`├─ tokens: ${m.l3.tokens}`); lines.push(`└─ assembly_time: ${m.l3.assemblyTime}ms`); lines.push(''); // L4 Formatting lines.push('[L4] Prompt Formatting'); lines.push(`├─ sections: [${(m.l4.sectionsIncluded || []).join(', ')}]`); lines.push(`└─ formatting_time: ${m.l4.formattingTime}ms`); lines.push(''); // Budget Summary lines.push('[Budget Summary]'); lines.push(`├─ total_tokens: ${m.budget.total}`); lines.push(`├─ budget_limit: ${m.budget.limit}`); lines.push(`├─ utilization: ${m.budget.utilization}%`); lines.push(`└─ breakdown:`); const bd = m.budget.breakdown || {}; lines.push(` ├─ constraints (L1): ${bd.constraints || 0}`); lines.push(` ├─ events (L2): ${bd.events || 0}`); lines.push(` ├─ chunks (L3): ${bd.chunks || 0}`); lines.push(` ├─ recent_orphans: ${bd.recentOrphans || 0}`); lines.push(` └─ arcs: ${bd.arcs || 0}`); lines.push(''); // Timing lines.push('[Timing]'); lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`); lines.push(`├─ l0_search: ${m.timing.l0Search}ms`); lines.push(`├─ l1_constraints: ${m.timing.l1Constraints}ms`); lines.push(`├─ l2_retrieval: ${m.timing.l2Retrieval}ms`); lines.push(`├─ l3_retrieval: ${m.timing.l3Retrieval}ms`); if (m.timing.l3Rerank > 0) { lines.push(`├─ l3_rerank: ${m.timing.l3Rerank}ms`); } lines.push(`├─ l3_assembly: ${m.timing.l3Assembly}ms`); lines.push(`├─ l4_formatting: ${m.timing.l4Formatting}ms`); lines.push(`└─ total: ${m.timing.total}ms`); lines.push(''); // Quality Indicators lines.push('[Quality Indicators]'); lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`); lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`); lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`); if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) { lines.push(`└─ potential_issues:`); m.quality.potentialIssues.forEach((issue, i) => { const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─'; lines.push(`${prefix} ⚠ ${issue}`); }); } else { lines.push(`└─ potential_issues: none`); } lines.push(''); lines.push('════════════════════════════════════════'); lines.push(''); return lines.join('\n'); } /** * 检测潜在问题 */ export function detectIssues(metrics) { const issues = []; const m = metrics; // 召回比例问题 if (m.l2.eventsConsidered > 0) { const selectRatio = m.l2.eventsSelected / m.l2.eventsConsidered; if (selectRatio < 0.1) { issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`); } if (selectRatio > 0.6 && m.l2.eventsConsidered > 10) { issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`); } } // 实体过滤问题 if (m.l2.entityFilterStats) { const ef = m.l2.entityFilterStats; if (ef.filtered === 0 && ef.before > 10) { issues.push(`No events filtered by entity - focus entities may be too broad or missing`); } if (ef.before > 0 && ef.filtered > ef.before * 0.8) { issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`); } } // 相似度问题 if (m.l2.similarityDistribution && m.l2.similarityDistribution.min > 0 && m.l2.similarityDistribution.min < 0.5) { issues.push(`Low similarity events included (min=${m.l2.similarityDistribution.min})`); } // 因果链问题 if (m.l2.eventsSelected > 0 && m.l2.causalEventsCount === 0 && m.l2.byRecallType.direct === 0) { issues.push('No direct or causal events - query expansion may be inaccurate'); } // L0 atoms 问题 if ((m.l0.atomsMatched || 0) === 0) { issues.push('L0 atoms not matched - may need to generate anchors'); } // Rerank 相关问题 if (m.l3.rerankApplied) { if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) { const filterRatio = 1 - (m.l3.afterRerank / m.l3.beforeRerank); if (filterRatio > 0.7) { issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`); } } if (m.l3.rerankScoreDistribution) { const rd = m.l3.rerankScoreDistribution; if (rd.max < 0.5) { issues.push(`Low rerank scores (max=${rd.max}) - query may be poorly matched`); } if (rd.mean < 0.3) { issues.push(`Very low average rerank score (mean=${rd.mean}) - context may be weak`); } } if (m.l3.rerankTime > 2000) { issues.push(`Slow rerank (${m.l3.rerankTime}ms) - may affect response time`); } } // 证据密度问题(基于 selected 的构成) if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) { const l1Real = m.l3.chunksSelectedByType.l1Real || 0; const density = l1Real / m.l3.chunksSelected; if (density < 0.3 && m.l3.chunksSelected > 10) { issues.push(`Low L1 chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`); } } // 预算问题 if (m.budget.utilization > 90) { issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`); } // 性能问题 if (m.timing.total > 5000) { issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`); } return issues; }