Files
LittleWhiteBox/modules/story-summary/vector/retrieval/metrics.js

531 lines
22 KiB
JavaScript
Raw Normal View History

// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence)
//
// 命名规范:
// - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact
// - 指标层用语义名称anchor/evidence/event/constraint/arc
//
// 架构变更v3 → v4
// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构
// - 删除 mergedByType / selectedByType不再有混合池
// - 新增 l0Candidates / l0Selected / l1Pulled / l1Attached / l1CosineTime
// - fusion 区块明确标注 L0-only删除 anchorCount
// - quality.chunkRealRatio → quality.l1AttachRate
// ═══════════════════════════════════════════════════════════════════════════
/**
* 创建空的指标对象
* @returns {object}
*/
export function createMetrics() {
return {
// Query Build - 查询构建
query: {
buildTime: 0,
refineTime: 0,
lengths: {
v0Chars: 0,
v1Chars: null, // null = NA
rerankChars: 0,
},
},
// Anchor (L0 StateAtoms) - 语义锚点
anchor: {
needRecall: false,
focusEntities: [],
matched: 0,
floorsHit: 0,
topHits: [],
},
// Lexical (MiniSearch) - 词法检索
lexical: {
terms: [],
atomHits: 0,
chunkHits: 0,
eventHits: 0,
searchTime: 0,
},
// Fusion (W-RRF, L0-only) - 多路融合
fusion: {
denseCount: 0,
lexCount: 0,
totalUnique: 0,
afterCap: 0,
time: 0,
},
// Constraint (L3 Facts) - 世界约束
constraint: {
total: 0,
filtered: 0,
injected: 0,
tokens: 0,
samples: [],
},
// Event (L2 Events) - 事件摘要
event: {
inStore: 0,
considered: 0,
selected: 0,
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilter: null,
causalChainDepth: 0,
causalCount: 0,
entitiesUsed: 0,
entityNames: [],
},
// Evidence (Two-Stage: L0 rerank → L1 pull) - 原文证据
evidence: {
// Stage 1: L0
l0Candidates: 0, // W-RRF 融合后的 L0 候选数
l0Selected: 0, // rerank 后选中的 L0 数
rerankApplied: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScores: null,
// Stage 2: L1
l1Pulled: 0, // 从 DB 拉取的 L1 chunk 总数
l1Attached: 0, // 实际挂载的 L1 数top-1 × 楼层 × 2侧
l1CosineTime: 0, // L1 cosine 打分耗时
// 装配
contextPairsAdded: 0, // 保留兼容(= l1Attached 中 USER 侧数量)
tokens: 0,
assemblyTime: 0,
},
// Arc - 人物弧光
arc: {
injected: 0,
tokens: 0,
},
// Formatting - 格式化
formatting: {
sectionsIncluded: [],
time: 0,
},
// Budget Summary - 预算
budget: {
total: 0,
limit: 0,
utilization: 0,
breakdown: {
constraints: 0,
events: 0,
distantEvidence: 0,
recentEvidence: 0,
arcs: 0,
},
},
// Timing - 计时
timing: {
queryBuild: 0,
queryRefine: 0,
anchorSearch: 0,
lexicalSearch: 0,
fusion: 0,
constraintFilter: 0,
eventRetrieval: 0,
evidenceRetrieval: 0,
evidenceRerank: 0,
evidenceAssembly: 0,
formatting: 0,
total: 0,
},
// Quality Indicators - 质量指标
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
l1AttachRate: 0, // 有 L1 挂载的 L0 占比
potentialIssues: [],
},
};
}
/**
* 计算相似度分布统计
* @param {number[]} similarities
* @returns {{min: number, max: number, mean: number, median: number}}
*/
export function calcSimilarityStats(similarities) {
if (!similarities?.length) {
return { min: 0, max: 0, mean: 0, median: 0 };
}
const sorted = [...similarities].sort((a, b) => a - b);
const sum = sorted.reduce((a, b) => a + b, 0);
return {
min: Number(sorted[0].toFixed(3)),
max: Number(sorted[sorted.length - 1].toFixed(3)),
mean: Number((sum / sorted.length).toFixed(3)),
median: Number(sorted[Math.floor(sorted.length / 2)].toFixed(3)),
};
}
/**
* 格式化指标为可读日志
* @param {object} metrics
* @returns {string}
*/
export function formatMetricsLog(metrics) {
const m = metrics;
const lines = [];
lines.push('');
lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report (v4) ');
lines.push('════════════════════════════════════════');
lines.push('');
// Query Length
lines.push('[Query Length] 查询长度');
lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`);
lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
lines.push('');
// Query Build
lines.push('[Query] 查询构建');
lines.push(`├─ build_time: ${m.query.buildTime}ms`);
lines.push(`└─ refine_time: ${m.query.refineTime}ms`);
lines.push('');
// Anchor (L0 StateAtoms)
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
if (m.anchor.needRecall) {
lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
}
lines.push('');
// Lexical (MiniSearch)
lines.push('[Lexical] MiniSearch - 词法检索');
lines.push(`├─ terms: [${(m.lexical.terms || []).slice(0, 8).join(', ')}]`);
lines.push(`├─ atom_hits: ${m.lexical.atomHits}`);
lines.push(`├─ chunk_hits: ${m.lexical.chunkHits}`);
lines.push(`├─ event_hits: ${m.lexical.eventHits}`);
lines.push(`└─ search_time: ${m.lexical.searchTime}ms`);
lines.push('');
// Fusion (W-RRF, L0-only)
lines.push('[Fusion] W-RRF (L0-only) - 多路融合');
lines.push(`├─ dense_count: ${m.fusion.denseCount}`);
lines.push(`├─ lex_count: ${m.fusion.lexCount}`);
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
lines.push(`└─ time: ${m.fusion.time}ms`);
lines.push('');
// Constraint (L3 Facts)
lines.push('[Constraint] L3 Facts - 世界约束');
lines.push(`├─ total: ${m.constraint.total}`);
lines.push(`├─ filtered: ${m.constraint.filtered || 0}`);
lines.push(`├─ injected: ${m.constraint.injected}`);
lines.push(`├─ tokens: ${m.constraint.tokens}`);
if (m.constraint.samples && m.constraint.samples.length > 0) {
lines.push(`└─ samples: "${m.constraint.samples.slice(0, 2).join('", "')}"`);
}
lines.push('');
// Event (L2 Events)
lines.push('[Event] L2 Events - 事件摘要');
lines.push(`├─ in_store: ${m.event.inStore}`);
lines.push(`├─ considered: ${m.event.considered}`);
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
lines.push(`├─ entity_filter:`);
lines.push(`│ ├─ focus_entities: [${(ef.focusEntities || []).join(', ')}]`);
lines.push(`│ ├─ before: ${ef.before}`);
lines.push(`│ ├─ after: ${ef.after}`);
lines.push(`│ └─ filtered: ${ef.filtered}`);
}
lines.push(`├─ selected: ${m.event.selected}`);
lines.push(`├─ by_recall_type:`);
lines.push(`│ ├─ direct: ${m.event.byRecallType.direct}`);
lines.push(`│ ├─ related: ${m.event.byRecallType.related}`);
lines.push(`│ ├─ causal: ${m.event.byRecallType.causal}`);
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
const sim = m.event.similarityDistribution;
if (sim && sim.max > 0) {
lines.push(`├─ similarity_distribution:`);
lines.push(`│ ├─ min: ${sim.min}`);
lines.push(`│ ├─ max: ${sim.max}`);
lines.push(`│ ├─ mean: ${sim.mean}`);
lines.push(`│ └─ median: ${sim.median}`);
}
lines.push(`├─ causal_chain: depth=${m.event.causalChainDepth}, count=${m.event.causalCount}`);
lines.push(`└─ entities_used: ${m.event.entitiesUsed} [${(m.event.entityNames || []).join(', ')}]`);
lines.push('');
// Evidence (Two-Stage)
lines.push('[Evidence] Two-Stage: L0 Locate → L1 Pull');
lines.push(`├─ Stage 1 (L0):`);
lines.push(`│ ├─ candidates (post-fusion): ${m.evidence.l0Candidates}`);
if (m.evidence.rerankApplied) {
lines.push(`│ ├─ rerank_applied: true`);
lines.push(`│ │ ├─ before: ${m.evidence.beforeRerank}`);
lines.push(`│ │ ├─ after: ${m.evidence.afterRerank}`);
lines.push(`│ │ └─ time: ${m.evidence.rerankTime}ms`);
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
lines.push(`│ ├─ rerank_scores: min=${rs.min}, max=${rs.max}, mean=${rs.mean}`);
}
} else {
lines.push(`│ ├─ rerank_applied: false`);
}
lines.push(`│ └─ selected: ${m.evidence.l0Selected}`);
lines.push(`├─ Stage 2 (L1):`);
lines.push(`│ ├─ pulled: ${m.evidence.l1Pulled}`);
lines.push(`│ ├─ attached: ${m.evidence.l1Attached}`);
lines.push(`│ └─ cosine_time: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ tokens: ${m.evidence.tokens}`);
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push('');
// Arc
if (m.arc.injected > 0) {
lines.push('[Arc] 人物弧光');
lines.push(`├─ injected: ${m.arc.injected}`);
lines.push(`└─ tokens: ${m.arc.tokens}`);
lines.push('');
}
// Formatting
lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
lines.push(`└─ time: ${m.formatting.time}ms`);
lines.push('');
// Budget Summary
lines.push('[Budget] 预算');
lines.push(`├─ total_tokens: ${m.budget.total}`);
lines.push(`├─ limit: ${m.budget.limit}`);
lines.push(`├─ utilization: ${m.budget.utilization}%`);
lines.push(`└─ breakdown:`);
const bd = m.budget.breakdown || {};
lines.push(` ├─ constraints: ${bd.constraints || 0}`);
lines.push(` ├─ events: ${bd.events || 0}`);
lines.push(` ├─ distant_evidence: ${bd.distantEvidence || 0}`);
lines.push(` ├─ recent_evidence: ${bd.recentEvidence || 0}`);
lines.push(` └─ arcs: ${bd.arcs || 0}`);
lines.push('');
// Timing
lines.push('[Timing] 计时');
lines.push(`├─ query_build: ${m.query.buildTime}ms`);
lines.push(`├─ query_refine: ${m.query.refineTime}ms`);
lines.push(`├─ anchor_search: ${m.timing.anchorSearch}ms`);
lines.push(`├─ lexical_search: ${m.lexical.searchTime}ms`);
lines.push(`├─ fusion: ${m.fusion.time}ms`);
lines.push(`├─ constraint_filter: ${m.timing.constraintFilter}ms`);
lines.push(`├─ event_retrieval: ${m.timing.eventRetrieval}ms`);
lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
if (m.timing.evidenceRerank > 0) {
lines.push(`├─ evidence_rerank: ${m.timing.evidenceRerank}ms`);
}
lines.push(`├─ l1_cosine: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ evidence_assembly: ${m.timing.evidenceAssembly}ms`);
lines.push(`├─ formatting: ${m.timing.formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
lines.push('');
// Quality Indicators
lines.push('[Quality] 质量指标');
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
m.quality.potentialIssues.forEach((issue, i) => {
const prefix = i === m.quality.potentialIssues.length - 1 ? ' └─' : ' ├─';
lines.push(`${prefix}${issue}`);
});
} else {
lines.push(`└─ potential_issues: none`);
}
lines.push('');
lines.push('════════════════════════════════════════');
lines.push('');
return lines.join('\n');
}
/**
* 检测潜在问题
* @param {object} metrics
* @returns {string[]}
*/
export function detectIssues(metrics) {
const issues = [];
const m = metrics;
// ─────────────────────────────────────────────────────────────────
// 查询构建问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.focusEntities || []).length === 0) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
}
// ─────────────────────────────────────────────────────────────────
// 锚点匹配问题
// ─────────────────────────────────────────────────────────────────
if ((m.anchor.matched || 0) === 0 && m.anchor.needRecall) {
issues.push('No anchors matched - may need to generate anchors');
}
// ─────────────────────────────────────────────────────────────────
// 词法检索问题
// ─────────────────────────────────────────────────────────────────
if ((m.lexical.terms || []).length > 0 && m.lexical.atomHits === 0 && m.lexical.chunkHits === 0 && m.lexical.eventHits === 0) {
issues.push('Lexical search returned zero hits - terms may not match any indexed content');
}
// ─────────────────────────────────────────────────────────────────
// 融合问题L0-only
// ─────────────────────────────────────────────────────────────────
if (m.fusion.lexCount === 0 && m.fusion.denseCount > 0) {
issues.push('No lexical L0 candidates in fusion - hybrid retrieval not contributing');
}
if (m.fusion.afterCap === 0) {
issues.push('Fusion produced zero L0 candidates - all retrieval paths may have failed');
}
// ─────────────────────────────────────────────────────────────────
// 事件召回问题
// ─────────────────────────────────────────────────────────────────
if (m.event.considered > 0) {
const denseSelected =
(m.event.byRecallType?.direct || 0) +
(m.event.byRecallType?.related || 0);
const denseSelectRatio = denseSelected / m.event.considered;
if (denseSelectRatio < 0.1) {
issues.push(`Dense event selection ratio too low (${(denseSelectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (denseSelectRatio > 0.6 && m.event.considered > 10) {
issues.push(`Dense event selection ratio high (${(denseSelectRatio * 100).toFixed(1)}%) - may include noise`);
}
}
// 实体过滤问题
if (m.event.entityFilter) {
const ef = m.event.entityFilter;
if (ef.filtered === 0 && ef.before > 10) {
issues.push('No events filtered by entity - focus entities may be too broad or missing');
}
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
}
}
// 相似度问题
if (m.event.similarityDistribution && m.event.similarityDistribution.min > 0 && m.event.similarityDistribution.min < 0.5) {
issues.push(`Low similarity events included (min=${m.event.similarityDistribution.min})`);
}
// 因果链问题
if (m.event.selected > 0 && m.event.causalCount === 0 && m.event.byRecallType.direct === 0) {
issues.push('No direct or causal events - query may not align with stored events');
}
// ─────────────────────────────────────────────────────────────────
// L0 Rerank 问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.rerankApplied) {
if (m.evidence.beforeRerank > 0 && m.evidence.afterRerank > 0) {
const filterRatio = 1 - (m.evidence.afterRerank / m.evidence.beforeRerank);
if (filterRatio > 0.7) {
issues.push(`High L0 rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant L0 in fusion output`);
}
}
if (m.evidence.rerankScores) {
const rs = m.evidence.rerankScores;
if (rs.max < 0.5) {
issues.push(`Low L0 rerank scores (max=${rs.max}) - query may be poorly matched`);
}
if (rs.mean < 0.3) {
issues.push(`Very low average L0 rerank score (mean=${rs.mean}) - context may be weak`);
}
}
if (m.evidence.rerankTime > 2000) {
issues.push(`Slow L0 rerank (${m.evidence.rerankTime}ms) - may affect response time`);
}
}
// ─────────────────────────────────────────────────────────────────
// L1 挂载问题
// ─────────────────────────────────────────────────────────────────
if (m.evidence.l0Selected > 0 && m.evidence.l1Pulled === 0) {
issues.push('Zero L1 chunks pulled - L1 vectors may not exist or DB read failed');
}
if (m.evidence.l0Selected > 0 && m.evidence.l1Attached === 0 && m.evidence.l1Pulled > 0) {
issues.push('L1 chunks pulled but none attached - cosine scores may be too low or floor mismatch');
}
const l1AttachRate = m.quality.l1AttachRate || 0;
if (m.evidence.l0Selected > 5 && l1AttachRate < 20) {
issues.push(`Low L1 attach rate (${l1AttachRate}%) - many L0 lack concrete dialogue evidence`);
}
// ─────────────────────────────────────────────────────────────────
// 预算问题
// ─────────────────────────────────────────────────────────────────
if (m.budget.utilization > 90) {
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
}
// ─────────────────────────────────────────────────────────────────
// 性能问题
// ─────────────────────────────────────────────────────────────────
if (m.timing.total > 8000) {
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
}
if (m.query.buildTime > 100) {
issues.push(`Slow query build (${m.query.buildTime}ms) - entity lexicon may be too large`);
}
if (m.evidence.l1CosineTime > 1000) {
issues.push(`Slow L1 cosine scoring (${m.evidence.l1CosineTime}ms) - too many chunks pulled`);
}
return issues;
}