Fix lint warnings and update retrieval modules
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Story Summary - Metrics Collector (v2 - 统一命名)
|
||||
// Story Summary - Metrics Collector (v3 - Deterministic Query + Hybrid + W-RRF)
|
||||
//
|
||||
// 命名规范:
|
||||
// - 存储层用 L0/L1/L2/L3(StateAtom/Chunk/Event/Fact)
|
||||
@@ -8,21 +8,44 @@
|
||||
|
||||
/**
|
||||
* 创建空的指标对象
|
||||
* @returns {object} 指标对象
|
||||
* @returns {object}
|
||||
*/
|
||||
export function createMetrics() {
|
||||
return {
|
||||
// Query Build - 查询构建
|
||||
query: {
|
||||
buildTime: 0,
|
||||
refineTime: 0,
|
||||
},
|
||||
|
||||
// Anchor (L0 StateAtoms) - 语义锚点
|
||||
anchor: {
|
||||
needRecall: false,
|
||||
focusEntities: [],
|
||||
queries: [],
|
||||
queryExpansionTime: 0,
|
||||
matched: 0,
|
||||
floorsHit: 0,
|
||||
topHits: [],
|
||||
},
|
||||
|
||||
// Lexical (MiniSearch) - 词法检索
|
||||
lexical: {
|
||||
terms: [],
|
||||
atomHits: 0,
|
||||
chunkHits: 0,
|
||||
eventHits: 0,
|
||||
searchTime: 0,
|
||||
},
|
||||
|
||||
// Fusion (W-RRF) - 多路融合
|
||||
fusion: {
|
||||
denseCount: 0,
|
||||
lexCount: 0,
|
||||
anchorCount: 0,
|
||||
totalUnique: 0,
|
||||
afterCap: 0,
|
||||
time: 0,
|
||||
},
|
||||
|
||||
// Constraint (L3 Facts) - 世界约束
|
||||
constraint: {
|
||||
total: 0,
|
||||
@@ -37,7 +60,7 @@ export function createMetrics() {
|
||||
inStore: 0,
|
||||
considered: 0,
|
||||
selected: 0,
|
||||
byRecallType: { direct: 0, related: 0, causal: 0 },
|
||||
byRecallType: { direct: 0, related: 0, causal: 0, lexical: 0 },
|
||||
similarityDistribution: { min: 0, max: 0, mean: 0, median: 0 },
|
||||
entityFilter: null,
|
||||
causalChainDepth: 0,
|
||||
@@ -50,7 +73,7 @@ export function createMetrics() {
|
||||
evidence: {
|
||||
floorsFromAnchors: 0,
|
||||
chunkTotal: 0,
|
||||
chunkAfterCoarse: 0,
|
||||
denseCoarse: 0,
|
||||
merged: 0,
|
||||
mergedByType: { anchorVirtual: 0, chunkReal: 0 },
|
||||
selected: 0,
|
||||
@@ -93,8 +116,11 @@ export function createMetrics() {
|
||||
|
||||
// Timing - 计时
|
||||
timing: {
|
||||
queryExpansion: 0,
|
||||
queryBuild: 0,
|
||||
queryRefine: 0,
|
||||
anchorSearch: 0,
|
||||
lexicalSearch: 0,
|
||||
fusion: 0,
|
||||
constraintFilter: 0,
|
||||
eventRetrieval: 0,
|
||||
evidenceRetrieval: 0,
|
||||
@@ -109,6 +135,7 @@ export function createMetrics() {
|
||||
constraintCoverage: 100,
|
||||
eventPrecisionProxy: 0,
|
||||
evidenceDensity: 0,
|
||||
chunkRealRatio: 0,
|
||||
potentialIssues: [],
|
||||
},
|
||||
};
|
||||
@@ -116,7 +143,7 @@ export function createMetrics() {
|
||||
|
||||
/**
|
||||
* 计算相似度分布统计
|
||||
* @param {number[]} similarities - 相似度数组
|
||||
* @param {number[]} similarities
|
||||
* @returns {{min: number, max: number, mean: number, median: number}}
|
||||
*/
|
||||
export function calcSimilarityStats(similarities) {
|
||||
@@ -137,8 +164,8 @@ export function calcSimilarityStats(similarities) {
|
||||
|
||||
/**
|
||||
* 格式化指标为可读日志
|
||||
* @param {object} metrics - 指标对象
|
||||
* @returns {string} 格式化后的日志
|
||||
* @param {object} metrics
|
||||
* @returns {string}
|
||||
*/
|
||||
export function formatMetricsLog(metrics) {
|
||||
const m = metrics;
|
||||
@@ -150,18 +177,41 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push('════════════════════════════════════════');
|
||||
lines.push('');
|
||||
|
||||
// Query Build
|
||||
lines.push('[Query] 查询构建');
|
||||
lines.push(`├─ build_time: ${m.query.buildTime}ms`);
|
||||
lines.push(`└─ refine_time: ${m.query.refineTime}ms`);
|
||||
lines.push('');
|
||||
|
||||
// Anchor (L0 StateAtoms)
|
||||
lines.push('[Anchor] L0 StateAtoms - 语义锚点');
|
||||
lines.push(`├─ need_recall: ${m.anchor.needRecall}`);
|
||||
if (m.anchor.needRecall) {
|
||||
lines.push(`├─ focus_entities: [${(m.anchor.focusEntities || []).join(', ')}]`);
|
||||
lines.push(`├─ queries: [${(m.anchor.queries || []).slice(0, 3).join(', ')}]`);
|
||||
lines.push(`├─ query_expansion_time: ${m.anchor.queryExpansionTime}ms`);
|
||||
lines.push(`├─ matched: ${m.anchor.matched || 0}`);
|
||||
lines.push(`└─ floors_hit: ${m.anchor.floorsHit || 0}`);
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
// Lexical (MiniSearch)
|
||||
lines.push('[Lexical] MiniSearch - 词法检索');
|
||||
lines.push(`├─ terms: [${(m.lexical.terms || []).slice(0, 8).join(', ')}]`);
|
||||
lines.push(`├─ atom_hits: ${m.lexical.atomHits}`);
|
||||
lines.push(`├─ chunk_hits: ${m.lexical.chunkHits}`);
|
||||
lines.push(`├─ event_hits: ${m.lexical.eventHits}`);
|
||||
lines.push(`└─ search_time: ${m.lexical.searchTime}ms`);
|
||||
lines.push('');
|
||||
|
||||
// Fusion (W-RRF)
|
||||
lines.push('[Fusion] W-RRF - 多路融合');
|
||||
lines.push(`├─ dense_count: ${m.fusion.denseCount}`);
|
||||
lines.push(`├─ lex_count: ${m.fusion.lexCount}`);
|
||||
lines.push(`├─ anchor_count: ${m.fusion.anchorCount}`);
|
||||
lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
|
||||
lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
|
||||
lines.push(`└─ time: ${m.fusion.time}ms`);
|
||||
lines.push('');
|
||||
|
||||
// Constraint (L3 Facts)
|
||||
lines.push('[Constraint] L3 Facts - 世界约束');
|
||||
lines.push(`├─ total: ${m.constraint.total}`);
|
||||
@@ -191,7 +241,8 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push(`├─ by_recall_type:`);
|
||||
lines.push(`│ ├─ direct: ${m.event.byRecallType.direct}`);
|
||||
lines.push(`│ ├─ related: ${m.event.byRecallType.related}`);
|
||||
lines.push(`│ └─ causal: ${m.event.byRecallType.causal}`);
|
||||
lines.push(`│ ├─ causal: ${m.event.byRecallType.causal}`);
|
||||
lines.push(`│ └─ lexical: ${m.event.byRecallType.lexical}`);
|
||||
|
||||
const sim = m.event.similarityDistribution;
|
||||
if (sim && sim.max > 0) {
|
||||
@@ -210,12 +261,9 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push('[Evidence] L1 Chunks - 原文证据');
|
||||
lines.push(`├─ floors_from_anchors: ${m.evidence.floorsFromAnchors}`);
|
||||
|
||||
// 粗筛信息
|
||||
if (m.evidence.chunkTotal > 0) {
|
||||
lines.push(`├─ coarse_filter:`);
|
||||
lines.push(`│ ├─ total: ${m.evidence.chunkTotal}`);
|
||||
lines.push(`│ ├─ after: ${m.evidence.chunkAfterCoarse}`);
|
||||
lines.push(`│ └─ filtered: ${m.evidence.chunkTotal - m.evidence.chunkAfterCoarse}`);
|
||||
lines.push(`├─ chunk_total: ${m.evidence.chunkTotal}`);
|
||||
lines.push(`├─ dense_coarse: ${m.evidence.denseCoarse}`);
|
||||
}
|
||||
|
||||
lines.push(`├─ merged: ${m.evidence.merged}`);
|
||||
@@ -225,7 +273,6 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push(`│ └─ chunk_real: ${mt.chunkReal || 0}`);
|
||||
}
|
||||
|
||||
// Rerank 信息
|
||||
if (m.evidence.rerankApplied) {
|
||||
lines.push(`├─ rerank_applied: true`);
|
||||
lines.push(`│ ├─ before: ${m.evidence.beforeRerank}`);
|
||||
@@ -281,8 +328,11 @@ export function formatMetricsLog(metrics) {
|
||||
|
||||
// Timing
|
||||
lines.push('[Timing] 计时');
|
||||
lines.push(`├─ query_expansion: ${m.timing.queryExpansion}ms`);
|
||||
lines.push(`├─ query_build: ${m.query.buildTime}ms`);
|
||||
lines.push(`├─ query_refine: ${m.query.refineTime}ms`);
|
||||
lines.push(`├─ anchor_search: ${m.timing.anchorSearch}ms`);
|
||||
lines.push(`├─ lexical_search: ${m.lexical.searchTime}ms`);
|
||||
lines.push(`├─ fusion: ${m.fusion.time}ms`);
|
||||
lines.push(`├─ constraint_filter: ${m.timing.constraintFilter}ms`);
|
||||
lines.push(`├─ event_retrieval: ${m.timing.eventRetrieval}ms`);
|
||||
lines.push(`├─ evidence_retrieval: ${m.timing.evidenceRetrieval}ms`);
|
||||
@@ -299,6 +349,7 @@ export function formatMetricsLog(metrics) {
|
||||
lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
|
||||
lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
|
||||
lines.push(`├─ evidence_density: ${m.quality.evidenceDensity}%`);
|
||||
lines.push(`├─ chunk_real_ratio: ${m.quality.chunkRealRatio}%`);
|
||||
|
||||
if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
|
||||
lines.push(`└─ potential_issues:`);
|
||||
@@ -319,14 +370,53 @@ export function formatMetricsLog(metrics) {
|
||||
|
||||
/**
|
||||
* 检测潜在问题
|
||||
* @param {object} metrics - 指标对象
|
||||
* @returns {string[]} 问题列表
|
||||
* @param {object} metrics
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function detectIssues(metrics) {
|
||||
const issues = [];
|
||||
const m = metrics;
|
||||
|
||||
// 事件召回比例问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 查询构建问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if ((m.anchor.focusEntities || []).length === 0) {
|
||||
issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 锚点匹配问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if ((m.anchor.matched || 0) === 0 && m.anchor.needRecall) {
|
||||
issues.push('No anchors matched - may need to generate anchors');
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 词法检索问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if ((m.lexical.terms || []).length > 0 && m.lexical.atomHits === 0 && m.lexical.chunkHits === 0 && m.lexical.eventHits === 0) {
|
||||
issues.push('Lexical search returned zero hits - terms may not match any indexed content');
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 融合问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if (m.fusion.lexCount === 0 && m.fusion.denseCount > 0) {
|
||||
issues.push('No lexical candidates in fusion - hybrid retrieval not contributing');
|
||||
}
|
||||
|
||||
if (m.fusion.afterCap === 0) {
|
||||
issues.push('Fusion produced zero candidates - all retrieval paths may have failed');
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 事件召回问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if (m.event.considered > 0) {
|
||||
const selectRatio = m.event.selected / m.event.considered;
|
||||
if (selectRatio < 0.1) {
|
||||
@@ -341,7 +431,7 @@ export function detectIssues(metrics) {
|
||||
if (m.event.entityFilter) {
|
||||
const ef = m.event.entityFilter;
|
||||
if (ef.filtered === 0 && ef.before > 10) {
|
||||
issues.push(`No events filtered by entity - focus entities may be too broad or missing`);
|
||||
issues.push('No events filtered by entity - focus entities may be too broad or missing');
|
||||
}
|
||||
if (ef.before > 0 && ef.filtered > ef.before * 0.8) {
|
||||
issues.push(`Too many events filtered (${ef.filtered}/${ef.before}) - focus may be too narrow`);
|
||||
@@ -355,19 +445,18 @@ export function detectIssues(metrics) {
|
||||
|
||||
// 因果链问题
|
||||
if (m.event.selected > 0 && m.event.causalCount === 0 && m.event.byRecallType.direct === 0) {
|
||||
issues.push('No direct or causal events - query expansion may be inaccurate');
|
||||
issues.push('No direct or causal events - query may not align with stored events');
|
||||
}
|
||||
|
||||
// 锚点匹配问题
|
||||
if ((m.anchor.matched || 0) === 0) {
|
||||
issues.push('No anchors matched - may need to generate anchors');
|
||||
}
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 证据问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
// 证据粗筛问题
|
||||
if (m.evidence.chunkTotal > 0 && m.evidence.chunkAfterCoarse > 0) {
|
||||
const coarseFilterRatio = 1 - (m.evidence.chunkAfterCoarse / m.evidence.chunkTotal);
|
||||
if (coarseFilterRatio > 0.9) {
|
||||
issues.push(`Very high evidence coarse filter ratio (${(coarseFilterRatio * 100).toFixed(0)}%) - query may be too specific`);
|
||||
// Dense 粗筛比例
|
||||
if (m.evidence.chunkTotal > 0 && m.evidence.denseCoarse > 0) {
|
||||
const coarseFilterRatio = 1 - (m.evidence.denseCoarse / m.evidence.chunkTotal);
|
||||
if (coarseFilterRatio > 0.95) {
|
||||
issues.push(`Very high dense coarse filter ratio (${(coarseFilterRatio * 100).toFixed(0)}%) - query vector may be poorly aligned`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -376,7 +465,7 @@ export function detectIssues(metrics) {
|
||||
if (m.evidence.beforeRerank > 0 && m.evidence.afterRerank > 0) {
|
||||
const filterRatio = 1 - (m.evidence.afterRerank / m.evidence.beforeRerank);
|
||||
if (filterRatio > 0.7) {
|
||||
issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks removed`);
|
||||
issues.push(`High rerank filter ratio (${(filterRatio * 100).toFixed(0)}%) - many irrelevant chunks in fusion output`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -395,24 +484,36 @@ export function detectIssues(metrics) {
|
||||
}
|
||||
}
|
||||
|
||||
// 证据密度问题
|
||||
// chunk_real 比例(核心质量指标)
|
||||
if (m.evidence.selected > 0 && m.evidence.selectedByType) {
|
||||
const chunkReal = m.evidence.selectedByType.chunkReal || 0;
|
||||
const density = chunkReal / m.evidence.selected;
|
||||
if (density < 0.3 && m.evidence.selected > 10) {
|
||||
issues.push(`Low real chunk ratio in selected (${(density * 100).toFixed(0)}%) - may lack concrete evidence`);
|
||||
const ratio = chunkReal / m.evidence.selected;
|
||||
if (ratio === 0 && m.evidence.selected > 5) {
|
||||
issues.push('Zero real chunks in selected evidence - only anchor virtual chunks present');
|
||||
} else if (ratio < 0.2 && m.evidence.selected > 10) {
|
||||
issues.push(`Low real chunk ratio (${(ratio * 100).toFixed(0)}%) - may lack concrete dialogue evidence`);
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 预算问题
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if (m.budget.utilization > 90) {
|
||||
issues.push(`High budget utilization (${m.budget.utilization}%) - may be truncating content`);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// 性能问题
|
||||
if (m.timing.total > 5000) {
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if (m.timing.total > 8000) {
|
||||
issues.push(`Slow recall (${m.timing.total}ms) - consider optimization`);
|
||||
}
|
||||
|
||||
if (m.query.buildTime > 100) {
|
||||
issues.push(`Slow query build (${m.query.buildTime}ms) - entity lexicon may be too large`);
|
||||
}
|
||||
|
||||
return issues;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user