improve lexical warmup and standardize stopword pipeline

This commit is contained in:
2026-02-17 14:49:47 +08:00
parent 246eb7a7e2
commit 94eceaed96
14 changed files with 4840 additions and 330 deletions

View File

@@ -52,6 +52,10 @@ export function createMetrics() {
eventHits: 0,
searchTime: 0,
indexReadyTime: 0,
idfEnabled: false,
idfDocCount: 0,
topIdfTerms: [],
termSearches: 0,
eventFilteredByDense: 0,
floorFilteredByDense: 0,
},
@@ -274,6 +278,20 @@ export function formatMetricsLog(metrics) {
if (m.lexical.indexReadyTime > 0) {
lines.push(`├─ index_ready_time: ${m.lexical.indexReadyTime}ms`);
}
lines.push(`├─ idf_enabled: ${!!m.lexical.idfEnabled}`);
if (m.lexical.idfDocCount > 0) {
lines.push(`├─ idf_doc_count: ${m.lexical.idfDocCount}`);
}
if ((m.lexical.topIdfTerms || []).length > 0) {
const topIdfText = m.lexical.topIdfTerms
.slice(0, 5)
.map(x => `${x.term}:${x.idf}`)
.join(', ');
lines.push(`├─ top_idf_terms: [${topIdfText}]`);
}
if (m.lexical.termSearches > 0) {
lines.push(`├─ term_searches: ${m.lexical.termSearches}`);
}
if (m.lexical.eventFilteredByDense > 0) {
lines.push(`├─ event_filtered_by_dense: ${m.lexical.eventFilteredByDense}`);
}