feat(recall): clamp focus weight and adjust pending context window

This commit is contained in:
2026-02-11 17:21:04 +08:00
parent 297cc03770
commit 816196a710
3 changed files with 544 additions and 327 deletions

View File

@@ -1,16 +1,12 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence)
// Story Summary - Metrics Collector (v5 - Weighted Query + Floor Aggregation)
//
// 命名规范
// - 存储层用 L0/L1/L2/L3StateAtom/Chunk/Event/Fact
// - 指标层用语义名称anchor/evidence/event/constraint/arc
//
// 架构变更v3 → v4
// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构
// - 删除 mergedByType / selectedByType不再有混合池
// - 新增 floorCandidates / floorsSelected / l0Collected / l1Pulled / l1Attached / l1CosineTime
// - fusion 区块明确标注 L0-only删除 anchorCount
// - quality.chunkRealRatio → quality.l1AttachRate
// v4 → v5 变更
// - query: 新增 segmentWeights / r2Weights加权向量诊断
// - fusion: 新增 denseAggMethod / lexDensityBonus聚合策略可观测
// - quality: 新增 rerankRetentionRate粗排-精排一致性)
// - 移除 timing 中从未写入的死字段queryBuild/queryRefine/lexicalSearch/fusion
// - 移除从未写入的 arc 区块
// ═══════════════════════════════════════════════════════════════════════════
/**
@@ -25,9 +21,11 @@ export function createMetrics() {
refineTime: 0,
lengths: {
v0Chars: 0,
v1Chars: null, // null = NA
v1Chars: null, // null = 无 hints
rerankChars: 0,
},
segmentWeights: [], // R1 归一化后权重 [context..., focus]
r2Weights: null, // R2 归一化后权重 [context..., focus, hints]null = 无 hints
},
// Anchor (L0 StateAtoms) - 语义锚点
@@ -55,6 +53,8 @@ export function createMetrics() {
totalUnique: 0,
afterCap: 0,
time: 0,
denseAggMethod: '', // 聚合方法描述(如 "max×0.6+mean×0.4"
lexDensityBonus: 0, // 密度加成系数
},
// Constraint (L3 Facts) - 世界约束
@@ -83,34 +83,28 @@ export function createMetrics() {
// Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据
evidence: {
// Stage 1: Floor
floorCandidates: 0, // W-RRF 融合后的 floor 候选数
floorsSelected: 0, // rerank 后选中的 floor 数
l0Collected: 0, // 选中 floor 中收集的 L0 atom 总数
floorCandidates: 0,
floorsSelected: 0,
l0Collected: 0,
rerankApplied: false,
rerankFailed: false,
beforeRerank: 0,
afterRerank: 0,
rerankTime: 0,
rerankScores: null,
rerankDocAvgLength: 0, // rerank document 平均字符数
rerankDocAvgLength: 0,
// Stage 2: L1
l1Pulled: 0, // 从 DB 拉取的 L1 chunk 总数
l1Attached: 0, // 实际挂载的 L1 数top-1 × floor × 2侧
l1CosineTime: 0, // L1 cosine 打分耗时
l1Pulled: 0,
l1Attached: 0,
l1CosineTime: 0,
// 装配
contextPairsAdded: 0, // USER 侧挂载数量
contextPairsAdded: 0,
tokens: 0,
assemblyTime: 0,
},
// Arc - 人物弧光
arc: {
injected: 0,
tokens: 0,
},
// Formatting - 格式化
formatting: {
sectionsIncluded: [],
@@ -131,13 +125,9 @@ export function createMetrics() {
},
},
// Timing - 计时
// Timing - 计时(仅包含实际写入的字段)
timing: {
queryBuild: 0,
queryRefine: 0,
anchorSearch: 0,
lexicalSearch: 0,
fusion: 0,
constraintFilter: 0,
eventRetrieval: 0,
evidenceRetrieval: 0,
@@ -151,7 +141,8 @@ export function createMetrics() {
quality: {
constraintCoverage: 100,
eventPrecisionProxy: 0,
l1AttachRate: 0, // 有 L1 挂载的 floor 占比
l1AttachRate: 0,
rerankRetentionRate: 0,
potentialIssues: [],
},
};
@@ -178,6 +169,16 @@ export function calcSimilarityStats(similarities) {
};
}
/**
* 格式化权重数组为紧凑字符串
* @param {number[]|null} weights
* @returns {string}
*/
function fmtWeights(weights) {
if (!weights?.length) return 'N/A';
return '[' + weights.map(w => (typeof w === 'number' ? w.toFixed(3) : String(w))).join(', ') + ']';
}
/**
* 格式化指标为可读日志
* @param {object} metrics
@@ -189,21 +190,27 @@ export function formatMetricsLog(metrics) {
lines.push('');
lines.push('════════════════════════════════════════');
lines.push(' Recall Metrics Report (v4) ');
lines.push(' Recall Metrics Report (v5) ');
lines.push('════════════════════════════════════════');
lines.push('');
// Query Length
lines.push('[Query Length] 查询长度');
lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`);
lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'N/A' : m.query.lengths.v1Chars}`);
lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
lines.push('');
// Query Build
lines.push('[Query] 查询构建');
lines.push(`├─ build_time: ${m.query.buildTime}ms`);
lines.push(`─ refine_time: ${m.query.refineTime}ms`);
lines.push(`─ refine_time: ${m.query.refineTime}ms`);
lines.push(`├─ r1_weights: ${fmtWeights(m.query.segmentWeights)}`);
if (m.query.r2Weights) {
lines.push(`└─ r2_weights: ${fmtWeights(m.query.r2Weights)}`);
} else {
lines.push(`└─ r2_weights: N/A (no hints)`);
}
lines.push('');
// Anchor (L0 StateAtoms)
@@ -228,7 +235,13 @@ export function formatMetricsLog(metrics) {
// Fusion (W-RRF, floor-level)
lines.push('[Fusion] W-RRF (floor-level) - 多路融合');
lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`);
if (m.fusion.denseAggMethod) {
lines.push(`│ └─ aggregation: ${m.fusion.denseAggMethod}`);
}
lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`);
if (m.fusion.lexDensityBonus > 0) {
lines.push(`│ └─ density_bonus: ${m.fusion.lexDensityBonus}`);
}
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
lines.push(`└─ time: ${m.fusion.time}ms`);
@@ -313,14 +326,6 @@ export function formatMetricsLog(metrics) {
lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
lines.push('');
// Arc
if (m.arc.injected > 0) {
lines.push('[Arc] 人物弧光');
lines.push(`├─ injected: ${m.arc.injected}`);
lines.push(`└─ tokens: ${m.arc.tokens}`);
lines.push('');
}
// Formatting
lines.push('[Formatting] 格式化');
lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
@@ -363,6 +368,7 @@ export function formatMetricsLog(metrics) {
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
lines.push(`├─ rerank_retention_rate: ${m.quality.rerankRetentionRate}%`);
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
lines.push(`└─ potential_issues:`);
@@ -398,6 +404,19 @@ export function detectIssues(metrics) {
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
}
// 权重极端退化检测
const segWeights = m.query.segmentWeights || [];
if (segWeights.length > 0) {
const focusWeight = segWeights[segWeights.length - 1] || 0;
if (focusWeight < 0.15) {
issues.push(`Focus segment weight very low (${(focusWeight * 100).toFixed(0)}%) - focus message may be too short`);
}
const allLow = segWeights.every(w => w < 0.1);
if (allLow) {
issues.push('All segment weights below 10% - all messages may be extremely short');
}
}
// ─────────────────────────────────────────────────────────────────
// 锚点匹配问题
// ─────────────────────────────────────────────────────────────────
@@ -494,6 +513,16 @@ export function detectIssues(metrics) {
}
}
// Rerank 保留率
const retentionRate = m.evidence.floorCandidates > 0
? Math.round(m.evidence.floorsSelected / m.evidence.floorCandidates * 100)
: 0;
m.quality.rerankRetentionRate = retentionRate;
if (m.evidence.floorCandidates > 0 && retentionRate < 25) {
issues.push(`Low rerank retention rate (${retentionRate}%) - fusion ranking poorly aligned with reranker`);
}
// ─────────────────────────────────────────────────────────────────
// L1 挂载问题
// ─────────────────────────────────────────────────────────────────