improve lexical warmup and standardize stopword pipeline

This commit is contained in:
2026-02-17 14:49:47 +08:00
parent 246eb7a7e2
commit 94eceaed96
14 changed files with 4840 additions and 330 deletions

View File

@@ -984,6 +984,12 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
: CONFIG.LAST_MESSAGES_K;
const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);
// Non-blocking preload: keep recall latency stable.
// If not ready yet, query-builder will gracefully fall back to TF terms.
getLexicalIndex().catch((e) => {
xbLog.warn(MODULE_ID, 'Preload lexical index failed; continue with TF fallback', e);
});
const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
const focusTerms = bundle.focusTerms || bundle.focusEntities || [];
const focusCharacters = bundle.focusCharacters || [];
@@ -1161,6 +1167,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
atomIds: [], atomFloors: new Set(),
chunkIds: [], chunkFloors: new Set(),
eventIds: [], chunkScores: [], searchTime: 0,
idfEnabled: false, idfDocCount: 0, topIdfTerms: [], termSearches: 0,
};
let indexReadyTime = 0;
@@ -1184,6 +1191,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
metrics.lexical.searchTime = lexicalResult.searchTime || 0;
metrics.lexical.indexReadyTime = indexReadyTime;
metrics.lexical.terms = bundle.lexicalTerms.slice(0, 10);
metrics.lexical.idfEnabled = !!lexicalResult.idfEnabled;
metrics.lexical.idfDocCount = lexicalResult.idfDocCount || 0;
metrics.lexical.topIdfTerms = lexicalResult.topIdfTerms || [];
metrics.lexical.termSearches = lexicalResult.termSearches || 0;
}
// 合并 L2 eventslexical 命中但 dense 未命中的 events
@@ -1238,7 +1249,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
}
xbLog.info(MODULE_ID,
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} idfEnabled=${lexicalResult.idfEnabled ? 'yes' : 'no'} idfDocs=${lexicalResult.idfDocCount || 0} termSearches=${lexicalResult.termSearches || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
);
// ═══════════════════════════════════════════════════════════════════