improve lexical warmup and standardize stopword pipeline

This commit is contained in:
2026-02-17 14:49:47 +08:00
parent 246eb7a7e2
commit 94eceaed96
14 changed files with 4840 additions and 330 deletions

View File

@@ -0,0 +1,9 @@
// Small domain-level tuning surface.
// Keep this file tiny: add/remove only words that are repeatedly noisy in real logs.
// Extra stopwords on top of BASE_STOP_WORDS.
export const DOMAIN_STOP_WORDS = [];
// High-value words that must never be filtered as stopwords.
// Default to empty for plugin-wide deployment; entity names are already protected dynamically.
export const KEEP_WORDS = [];