LittleWhiteBox/modules/story-summary/vector/retrieval/metrics.js

// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector
// 召回质量指标收集与格式化
// ═══════════════════════════════════════════════════════════════════════════

/**
 * 创建空的指标对象
 */
export function createMetrics() {
    return {
        // L0 Query Understanding
        l0: {
            needRecall: false,
            intent: '',
            focusEntities: [],
            queries: [],
            implicitTopics: [],
            queryExpansionTime: 0,
            atomsMatched: 0,
            floorsHit: 0,
            topAtoms: [],
        },

        // L1 Constraints (Facts)
        l1: {
            factsTotal: 0,
            factsInjected: 0,
            factsFiltered: 0,
            tokens: 0,
            samples: [],
        },

        // L2 Narrative Retrieval
        l2: {
            eventsInStore: 0,
            eventsConsidered: 0,
            eventsSelected: 0,
            byRecallType: { direct: 0, causal: 0, context: 0 },
            similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
            entityFilterStats: null,
            causalChainDepth: 0,
            causalEventsCount: 0,
            entitiesLoaded: 0,
            entityNames: [],
            retrievalTime: 0,
        },

        // L3 Evidence Assembly
        l3: {
            floorsFromL0: 0,
            // 候选规模（rerank 前）
            chunksInRange: 0,
            chunksInRangeByType: { l0Virtual: 0, l1Real: 0 },
            // 最终注入（rerank + sparse 后）
            chunksSelected: 0,
            chunksSelectedByType: { l0Virtual: 0, l1Real: 0 },
            // 上下文配对
            contextPairsAdded: 0,
            tokens: 0,
            assemblyTime: 0,
            // Rerank 相关
            rerankApplied: false,
            beforeRerank: 0,
            afterRerank: 0,
            rerankTime: 0,
            rerankScoreDistribution: null,
        },

        // L4 Formatting
        l4: {
            sectionsIncluded: [],
            formattingTime: 0,
        },

        // Budget Summary
        budget: {
            total: 0,
            limit: 0,
            utilization: 0,
            breakdown: {
                constraints: 0,
                events: 0,
                entities: 0,
                chunks: 0,
                recentOrphans: 0,
                arcs: 0,
            },
        },

        // Total Timing
        timing: {
            queryExpansion: 0,
            l0Search: 0,
            l1Constraints: 0,
            l2Retrieval: 0,
            l3Retrieval: 0,
            l3Rerank: 0,
            l3Assembly: 0,
            l4Formatting: 0,
            total: 0,
        },

        // Quality Indicators
        quality: {
            constraintCoverage: 100,
            eventPrecisionProxy: 0,
            evidenceDensity: 0,
            potentialIssues: [],
        },
    };
}

/**
 * 计算相似度分布统计
 */
export function calcSimilarityStats(similarities) {
    if (!similarities?.length) {
        return { min: 0, max: 0, mean: 0, median: 0 };
    }

    const sorted = [...similarities].sort((a, b) => a - b);
    const sum = sorted.reduce((a, b) => a + b, 0);

    return {
        min: Number(sorted[0].toFixed(3)),
        max: Number(sorted[sorted.length - 1].toFixed(3)),
        mean: Number((sum / sorted.length).toFixed(3)),
        median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
    };
}

/**
 * 格式化指标为可读日志
 */
export function formatMetricsLog(metrics) {
    const m = metrics;
    const lines = [];

    lines.push('');
    lines.push('════════════════════════════════════════');
    lines.push('        Recall Metrics Report           ');
    lines.push('════════════════════════════════════════');
    lines.push('');

    // L0 Query Understanding
    lines.push('[L0] Query Understanding');
    lines.push(`├─ need_recall: ${m.l0.needRecall}`);
    if (m.l0.needRecall) {
        lines.push(`├─ intent: ${m.l0.intent || 'mixed'}`);
        lines.push(`├─ focus_entities: [${(m.l0.focusEntities || []).join(', ')}]`);
        lines.push(`├─ queries: [${(m.l0.queries || []).slice(0, 3).join(', ')}]`);
        lines.push(`├─ query_expansion_time: ${m.l0.queryExpansionTime}ms`);
        lines.push(`├─ atoms_matched: ${m.l0.atomsMatched || 0}`);
        lines.push(`└─ floors_hit: ${m.l0.floorsHit || 0}`);
    }
    lines.push('');

    // L1 Constraints
    lines.push('[L1] Constraints (Facts)');
    lines.push(`├─ facts_total: ${m.l1.factsTotal}`);
    lines.push(`├─ facts_filtered: ${m.l1.factsFiltered || 0}`);
    lines.push(`├─ facts_injected: ${m.l1.factsInjected}`);
    lines.push(`├─ tokens: ${m.l1.tokens}`);
    if (m.l1.samples && m.l1.samples.length > 0) {
        lines.push(`└─ samples: "${m.l1.samples.slice(0, 2).join('", "')}"`);
    }
    lines.push('');

    // L2 Narrative Retrieval
    lines.push('[L2] Narrative Retrieval');
    lines.push(`├─ events_in_store: ${m.l2.eventsInStore}`);
    lines.push(`├─ events_considered: ${m.l2.eventsConsidered}`);

    if (m.l2.entityFilterStats) {
        const ef = m.l2.entityFilterStats;
        lines.push(`├─ entity_filter:`);
        lines.push(`│   ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
        lines.push(`│   ├─ before_filter: ${ef.before}`);
        lines.push(`│   ├─ after_filter: ${ef.after}`);
        lines.push(`│   └─ filtered_out: ${ef.filtered}`);
    }

    lines.push(`├─ events_selected: ${m.l2.eventsSelected}`);
    lines.push(`├─ by_recall_type:`);
    lines.push(`│   ├─ direct: ${m.l2.byRecallType.direct}`);
    lines.push(`│   ├─ causal: ${m.l2.byRecallType.causal}`);
    lines.push(`│   └─ context: ${m.l2.byRecallType.context}`);

    const sim = m.l2.similarityDistribution;
    if (sim && sim.max > 0) {
        lines.push(`├─ similarity_distribution:`);
        lines.push(`│   ├─ min: ${sim.min}`);
        lines.push(`│   ├─ max: ${sim.max}`);
        lines.push(`│   ├─ mean: ${sim.mean}`);
        lines.push(`│   └─ median: ${sim.median}`);
    }

    lines.push(`├─ causal_chain: depth=${m.l2.causalChainDepth}, events=${m.l2.causalEventsCount}`);
    lines.push(`├─ entities_loaded: ${m.l2.entitiesLoaded} [${(m.l2.entityNames || []).join(', ')}]`);
    lines.push(`└─ retrieval_time: ${m.l2.retrievalTime}ms`);
    lines.push('');

    // L3 Evidence Assembly
    lines.push('[L3] Evidence Assembly');
    lines.push(`├─ floors_from_l0: ${m.l3.floorsFromL0}`);

    // 候选规模
    lines.push(`├─ chunks_in_range: ${m.l3.chunksInRange}`);
    if (m.l3.chunksInRangeByType) {
        const cir = m.l3.chunksInRangeByType;
        lines.push(`│   ├─ l0_virtual: ${cir.l0Virtual || 0}`);
        lines.push(`│   └─ l1_real: ${cir.l1Real || 0}`);
    }

    // Rerank 信息
    if (m.l3.rerankApplied) {
        lines.push(`├─ rerank_applied: true`);
        lines.push(`│   ├─ before: ${m.l3.beforeRerank}`);
        lines.push(`│   ├─ after: ${m.l3.afterRerank}`);
        lines.push(`│   └─ time: ${m.l3.rerankTime}ms`);
        if (m.l3.rerankScoreDistribution) {
            const rd = m.l3.rerankScoreDistribution;
            lines.push(`├─ rerank_scores: min=${rd.min}, max=${rd.max}, mean=${rd.mean}`);
        }
    } else {
        lines.push(`├─ rerank_applied: false`);
    }

    // 最终注入规模
    lines.push(`├─ chunks_selected: ${m.l3.chunksSelected}`);
    if (m.l3.chunksSelectedByType) {
        const cs = m.l3.chunksSelectedByType;
        lines.push(`│   ├─ l0_virtual: ${cs.l0Virtual || 0}`);
        lines.push(`│   └─ l1_real: ${cs.l1Real || 0}`);
    }

    lines.push(`├─ context_pairs_added: ${m.l3.contextPairsAdded}`);
    lines.push(`├─ tokens: ${m.l3.tokens}`);
    lines.push(`└─ assembly_time: ${m.l3.assemblyTime}ms`);
    lines.push('');

    // L4 Formatting
    lines.push('[L4] Prompt Formatting');
    lines.push(`├─ sections: [${(m.l4.sectionsIncluded || []).join(', ')}]`);
    lines.push(`└─ formatting_time: ${m.l4.formattingTime}ms`);
    lines.push('');

    // Budget Summary
    lines.push('[Budget Summary]');
    lines.push(`├─ total_tokens: ${m.budget.total}`);
    lines.push(`├─ budget_limit: ${m.budget.limit}`);
    lines.push(`├─ utilization: ${m.budget.utilization}%`);
    lines.push(`└─ breakdown:`);
    const bd = m.budget.breakdown || {};
    lines.push(`    ├─ constraints (L1): ${bd.constraints || 0}`);
    lines.push(`    ├─ events (L2): ${bd.events || 0}`);
    lines.push(`    ├─ chunks (L3): ${bd.chunks || 0}`);
    lines.push(`    ├─ recent_orphans: ${bd.recentOrphans || 0}`);
    lines.push(`    └─ arcs: ${bd.arcs || 0}`);
    lines.push('');

    // Timing
    lines.push('[Timing]');
    lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`);
    lines.push(`├─ l0_search: ${m.timing.l0Search}ms`);
    lines.push(`├─ l1_constraints: ${m.timing.l1Constraints}ms`);
    lines.push(`├─ l2_retrieval: ${m.timing.l2Retrieval}ms`);
    lines.push(`├─ l3_retrieval: ${m.timing.l3Retrieval}ms`);
    if (m.timing.l3Rerank > 0) {
        lines.push(`├─ l3_rerank: ${m.timing.l3Rerank}ms`);
    }
    lines.push(`├─ l3_assembly: ${m.timing.l3Assembly}ms`);
    lines.push(`├─ l4_formatting: ${m.timing.l4Formatting}ms`);
    lines.push(`└─ total: ${m.timing.total}ms`);
    lines.push('');

    // Quality Indicators
    lines.push('[Quality Indicators]');
    lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
    lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
    lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`);

    if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
        lines.push(`└─ potential_issues:`);
        m.quality.potentialIssues.forEach((issue, i) => {
            const prefix = i === m.quality.potentialIssues.length - 1 ? '   └─' : '   ├─';
            lines.push(`${prefix} ⚠ ${issue}`);
        });
    } else {
        lines.push(`└─ potential_issues: none`);
    }

    lines.push('');
    lines.push('════════════════════════════════════════');
    lines.push('');

    return lines.join('\n');
}

/**
 * 检测潜在问题
 */
export function detectIssues(metrics) {
    const issues = [];
    const m = metrics;

    // 召回比例问题
    if (m.l2.eventsConsidered > 0) {
        const selectRatio = m.l2.eventsSelected / m.l2.eventsConsidered;
        if (selectRatio < 0.1) {
            issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`);
        }
        if (selectRatio > 0.6 && m.l2.eventsConsidered > 10) {
            issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`);
        }
    }

    // 实体过滤问题
    if (m.l2.entityFilterStats) {
        const ef = m.l2.entityFilterStats;
        if (ef.filtered === 0 && ef.before > 10) {
            issues.push(`No events filtered by entity - focus entities may be too broad or missing`);
        }
        if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
            issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
        }
    }

    // 相似度问题
    if (m.l2.similarityDistribution && m.l2.similarityDistribution.min > 0 && m.l2.similarityDistribution.min < 0.5) {
        issues.push(`Low similarity events included (min=${m.l2.similarityDistribution.min})`);
    }

    // 因果链问题
    if (m.l2.eventsSelected > 0 && m.l2.causalEventsCount === 0 && m.l2.byRecallType.direct === 0) {
        issues.push('No direct or causal events - query expansion may be inaccurate');
    }

    // L0 atoms 问题
    if ((m.l0.atomsMatched || 0) === 0) {
        issues.push('L0 atoms not matched - may need to generate anchors');
    }

    // Rerank 相关问题
    if (m.l3.rerankApplied) {
        if (m.l3.beforeRerank > 0 && m.l3.afterRerank > 0) {
            const filterRatio = 1 - (m.l3.afterRerank / m.l3.beforeRerank);
            if (filterRatio > 0.7) {
                issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`);
            }
        }

        if (m.l3.rerankScoreDistribution) {
            const rd = m.l3.rerankScoreDistribution;
            if (rd.max < 0.5) {
                issues.push(`Low rerank scores (max=${rd.max}) - query may be poorly matched`);
            }
            if (rd.mean < 0.3) {
                issues.push(`Very low average rerank score (mean=${rd.mean}) - context may be weak`);
            }
        }

        if (m.l3.rerankTime > 2000) {
            issues.push(`Slow rerank (${m.l3.rerankTime}ms) - may affect response time`);
        }
    }

    // 证据密度问题（基于 selected 的构成）
    if (m.l3.chunksSelected > 0 && m.l3.chunksSelectedByType) {
        const l1Real = m.l3.chunksSelectedByType.l1Real || 0;
        const density = l1Real / m.l3.chunksSelected;
        if (density < 0.3 && m.l3.chunksSelected > 10) {
            issues.push(`Low L1 chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`);
        }
    }

    // 预算问题
    if (m.budget.utilization > 90) {
        issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
    }

    // 性能问题
    if (m.timing.total > 5000) {
        issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
    }

    return issues;
}