feat(recall): add diffusion stage and improve retrieval metrics

This commit is contained in:
2026-02-12 15:36:07 +08:00
parent 111cd081f6
commit a646a70224
6 changed files with 1084 additions and 61 deletions

View File

@@ -78,7 +78,7 @@ export function createMetrics() {
inStore: 0,
considered: 0,
selected: 0,
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0 },
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0, l0Linked: 0 },
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
entityFilter: null,
causalChainDepth: 0,
@@ -112,6 +112,23 @@ export function createMetrics() {
assemblyTime: 0,
},
// Diffusion (PPR Spreading Activation) - 图扩散
diffusion: {
seedCount: 0,
graphNodes: 0,
graphEdges: 0,
iterations: 0,
convergenceError: 0,
pprActivated: 0,
cosineGatePassed: 0,
cosineGateFiltered: 0,
cosineGateNoVector: 0,
finalCount: 0,
scoreDistribution: { min: 0, max: 0, mean: 0 },
byChannel: { who: 0, what: 0, where: 0, how: 0 },
time: 0,
},
// Formatting - 格式化
formatting: {
sectionsIncluded: [],
@@ -140,6 +157,7 @@ export function createMetrics() {
evidenceRetrieval: 0,
evidenceRerank: 0,
evidenceAssembly: 0,
diffusion: 0,
formatting: 0,
total: 0,
},
@@ -249,9 +267,6 @@ export function formatMetricsLog(metrics) {
// Fusion (W-RRF, floor-level)
lines.push('[Fusion] W-RRF (floor-level) - 多路融合');
lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`);
if (m.fusion.denseAggMethod) {
lines.push(`│ └─ aggregation: ${m.fusion.denseAggMethod}`);
}
lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`);
if (m.fusion.lexDensityBonus > 0) {
lines.push(`│ └─ density_bonus: ${m.fusion.lexDensityBonus}`);
@@ -291,7 +306,12 @@ export function formatMetricsLog(metrics) {
lines.push(`│ ├─ direct: ${m.event.byRecallType.direct}`);
lines.push(`│ ├─ related: ${m.event.byRecallType.related}`);
lines.push(`│ ├─ causal: ${m.event.byRecallType.causal}`);
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
if (m.event.byRecallType.l0Linked) {
lines.push(`│ ├─ lexical: ${m.event.byRecallType.lexical}`);
lines.push(`│ └─ l0_linked: ${m.event.byRecallType.l0Linked}`);
} else {
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
}
const sim = m.event.similarityDistribution;
if (sim && sim.max > 0) {
@@ -340,6 +360,32 @@ export function formatMetricsLog(metrics) {
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push('');
// Diffusion (PPR)
lines.push('[Diffusion] PPR Spreading Activation');
lines.push(`├─ seeds: ${m.diffusion.seedCount}`);
lines.push(`├─ graph: ${m.diffusion.graphNodes} nodes, ${m.diffusion.graphEdges} edges`);
if (m.diffusion.graphEdges > 0) {
const ch = m.diffusion.byChannel || {};
lines.push(`│ └─ by_channel: who=${ch.who || 0}, what=${ch.what || 0}, where=${ch.where || 0}, how=${ch.how || 0}`);
}
if (m.diffusion.iterations > 0) {
lines.push(`├─ ppr: ${m.diffusion.iterations} iterations, ε=${Number(m.diffusion.convergenceError).toExponential(1)}`);
}
lines.push(`├─ activated (excl seeds): ${m.diffusion.pprActivated}`);
if (m.diffusion.pprActivated > 0) {
lines.push(`├─ cosine_gate: ${m.diffusion.cosineGatePassed} passed, ${m.diffusion.cosineGateFiltered} filtered`);
if (m.diffusion.cosineGateNoVector > 0) {
lines.push(`│ └─ no_vector: ${m.diffusion.cosineGateNoVector}`);
}
}
lines.push(`├─ final_injected: ${m.diffusion.finalCount}`);
if (m.diffusion.finalCount > 0) {
const ds = m.diffusion.scoreDistribution;
lines.push(`├─ scores: min=${ds.min}, max=${ds.max}, mean=${ds.mean}`);
}
lines.push(`└─ time: ${m.diffusion.time}ms`);
lines.push('');
// Formatting
lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
@@ -372,6 +418,7 @@ export function formatMetricsLog(metrics) {
lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
lines.push(`├─ floor_rerank: ${m.timing.evidenceRerank || 0}ms`);
lines.push(`├─ l1_cosine: ${m.evidence.l1CosineTime}ms`);
lines.push(`├─ diffusion: ${m.timing.diffusion}ms`);
lines.push(`├─ evidence_assembly: ${m.timing.evidenceAssembly}ms`);
lines.push(`├─ formatting: ${m.timing.formatting}ms`);
lines.push(`└─ total: ${m.timing.total}ms`);
@@ -578,5 +625,25 @@ export function detectIssues(metrics) {
issues.push(`Slow L1 cosine scoring (${m.evidence.l1CosineTime}ms) - too many chunks pulled`);
}
// ─────────────────────────────────────────────────────────────────
// Diffusion 问题
// ─────────────────────────────────────────────────────────────────
if (m.diffusion.graphEdges === 0 && m.diffusion.seedCount > 0) {
issues.push('No diffusion graph edges - atoms may lack who/edges fields');
}
if (m.diffusion.pprActivated > 0 && m.diffusion.cosineGatePassed === 0) {
issues.push('All PPR-activated nodes failed cosine gate - graph structure diverged from query semantics');
}
if (m.diffusion.cosineGateNoVector > 5) {
issues.push(`${m.diffusion.cosineGateNoVector} PPR nodes missing vectors - L0 vectorization may be incomplete`);
}
if (m.diffusion.time > 50) {
issues.push(`Slow diffusion (${m.diffusion.time}ms) - graph may be too dense`);
}
return issues;
}