From 94eceaed9694aa5a2a143ea2df2d9be001642ec0 Mon Sep 17 00:00:00 2001
From: bielie <bielie@070997.xyz>
Date: Tue, 17 Feb 2026 14:49:47 +0800
Subject: [PATCH] improve lexical warmup and standardize stopword pipeline

---
 modules/story-summary/story-summary.js        |    2 +
 .../story-summary/vector/llm/llm-service.js   |    1 -
 .../vector/retrieval/lexical-index.js         |  563 ++---
 .../story-summary/vector/retrieval/metrics.js |   18 +
 .../vector/retrieval/query-builder.js         |   10 +-
 .../story-summary/vector/retrieval/recall.js  |   13 +-
 .../vector/utils/stopwords-base.js            | 2231 +++++++++++++++++
 .../stopwords-data/LICENSE.stopwords-iso.txt  |   21 +
 .../vector/utils/stopwords-data/SOURCES.md    |   15 +
 .../utils/stopwords-data/stopwords-iso.en.txt | 1298 ++++++++++
 .../utils/stopwords-data/stopwords-iso.ja.txt |  134 +
 .../utils/stopwords-data/stopwords-iso.zh.txt |  794 ++++++
 .../vector/utils/stopwords-patch.js           |    9 +
 .../story-summary/vector/utils/tokenizer.js   |   61 +-
 14 files changed, 4840 insertions(+), 330 deletions(-)
 create mode 100644 modules/story-summary/vector/utils/stopwords-base.js
 create mode 100644 modules/story-summary/vector/utils/stopwords-data/LICENSE.stopwords-iso.txt
 create mode 100644 modules/story-summary/vector/utils/stopwords-data/SOURCES.md
 create mode 100644 modules/story-summary/vector/utils/stopwords-data/stopwords-iso.en.txt
 create mode 100644 modules/story-summary/vector/utils/stopwords-data/stopwords-iso.ja.txt
 create mode 100644 modules/story-summary/vector/utils/stopwords-data/stopwords-iso.zh.txt
 create mode 100644 modules/story-summary/vector/utils/stopwords-patch.js

diff --git a/modules/story-summary/story-summary.js b/modules/story-summary/story-summary.js
index 9cb242f..4f95d0b 100644
--- a/modules/story-summary/story-summary.js
+++ b/modules/story-summary/story-summary.js
@@ -1551,6 +1551,7 @@ async function handleMessageReceived(scheduledChatId) {
 
     // Refresh entity lexicon after new message (new roles may appear)
     refreshEntityLexiconAndWarmup();
+    scheduleLexicalWarmup(100);
 
     // Auto backfill missing L0 (delay to avoid contention with current floor)
     setTimeout(() => maybeAutoExtractL0(), 2000);
@@ -1559,6 +1560,7 @@ async function handleMessageReceived(scheduledChatId) {
 function handleMessageSent(scheduledChatId) {
     if (isChatStale(scheduledChatId)) return;
     initButtonsForAll();
+    scheduleLexicalWarmup(0);
     setTimeout(() => maybeAutoRunSummary("before_user"), 1000);
 }
 
diff --git a/modules/story-summary/vector/llm/llm-service.js b/modules/story-summary/vector/llm/llm-service.js
index 13ec391..7120b64 100644
--- a/modules/story-summary/vector/llm/llm-service.js
+++ b/modules/story-summary/vector/llm/llm-service.js
@@ -2,7 +2,6 @@
 // vector/llm/llm-service.js - 修复 prefill 传递方式
 // ═══════════════════════════════════════════════════════════════════════════
 import { xbLog } from '../../../../core/debug-core.js';
-import { getVectorConfig } from '../../data/config.js';
 import { getApiKey } from './siliconflow.js';
 
 const MODULE_ID = 'vector-llm-service';
diff --git a/modules/story-summary/vector/retrieval/lexical-index.js b/modules/story-summary/vector/retrieval/lexical-index.js
index 83124d6..f464af7 100644
--- a/modules/story-summary/vector/retrieval/lexical-index.js
+++ b/modules/story-summary/vector/retrieval/lexical-index.js
@@ -1,16 +1,3 @@
-// ═══════════════════════════════════════════════════════════════════════════
-// lexical-index.js - MiniSearch 词法检索索引
-//
-// 职责：
-// 1. 对 L0 atoms + L1 chunks + L2 events 建立词法索引
-// 2. 提供词法检索接口（专名精确匹配兜底）
-// 3. 惰性构建 + 异步预热 + 缓存失效机制
-//
-// 索引存储：纯内存（不持久化）
-// 分词器：统一使用 tokenizer.js（结巴 + 实体保护 + 降级）
-// 重建时机：CHAT_CHANGED / L0提取完成 / L2总结完成
-// ═══════════════════════════════════════════════════════════════════════════
-
 import MiniSearch from '../../../../libs/minisearch.mjs';
 import { getContext } from '../../../../../../../extensions.js';
 import { getSummaryStore } from '../../data/store.js';
@@ -20,76 +7,166 @@ import { tokenizeForIndex } from '../utils/tokenizer.js';
 
 const MODULE_ID = 'lexical-index';
 
-// ─────────────────────────────────────────────────────────────────────────
-// 缓存
-// ─────────────────────────────────────────────────────────────────────────
-
-/** @type {MiniSearch|null} */
+// In-memory index cache
 let cachedIndex = null;
-
-/** @type {string|null} */
 let cachedChatId = null;
-
-/** @type {string|null} 数据指纹（atoms + chunks + events 数量） */
 let cachedFingerprint = null;
-
-/** @type {boolean} 是否正在构建 */
 let building = false;
-
-/** @type {Promise<MiniSearch|null>|null} 当前构建 Promise（防重入） */
 let buildPromise = null;
-/** @type {Map<number, string[]>} floor → 该楼层的 doc IDs（仅 L1 chunks） */
+
+// floor -> chunk doc ids (L1 only)
 let floorDocIds = new Map();
 
-// ─────────────────────────────────────────────────────────────────────────
-// 工具函数
-// ─────────────────────────────────────────────────────────────────────────
+// IDF stats over lexical docs (L1 chunks + L2 events)
+let termDfMap = new Map();
+let docTokenSets = new Map(); // docId -> Set<token>
+let lexicalDocCount = 0;
+
+const IDF_MIN = 1.0;
+const IDF_MAX = 4.0;
+const BUILD_BATCH_SIZE = 500;
 
-/**
- * 清理事件摘要（移除楼层标记）
- * @param {string} summary
- * @returns {string}
- */
 function cleanSummary(summary) {
     return String(summary || '')
         .replace(/\s*\(#\d+(?:-\d+)?\)\s*$/, '')
         .trim();
 }
 
-/**
- * 计算缓存指纹
- * @param {number} chunkCount
- * @param {number} eventCount
- * @returns {string}
- */
-function computeFingerprint(chunkCount, eventCount) {
-    return `${chunkCount}:${eventCount}`;
+function fnv1a32(input, seed = 0x811C9DC5) {
+    let hash = seed >>> 0;
+    const text = String(input || '');
+    for (let i = 0; i < text.length; i++) {
+        hash ^= text.charCodeAt(i);
+        hash = Math.imul(hash, 0x01000193) >>> 0;
+    }
+    return hash >>> 0;
+}
+
+function compareDocKeys(a, b) {
+    const ka = `${a?.type || ''}:${a?.id || ''}`;
+    const kb = `${b?.type || ''}:${b?.id || ''}`;
+    if (ka < kb) return -1;
+    if (ka > kb) return 1;
+    return 0;
+}
+
+function computeFingerprintFromDocs(docs) {
+    const normalizedDocs = Array.isArray(docs) ? [...docs].sort(compareDocKeys) : [];
+    let hash = 0x811C9DC5;
+
+    for (const doc of normalizedDocs) {
+        const payload = `${doc?.type || ''}\u001F${doc?.id || ''}\u001F${doc?.floor ?? ''}\u001F${doc?.text || ''}\u001E`;
+        hash = fnv1a32(payload, hash);
+    }
+
+    return `${normalizedDocs.length}:${(hash >>> 0).toString(16)}`;
 }
 
-/**
- * 让出主线程（避免长时间阻塞 UI）
- * @returns {Promise<void>}
- */
 function yieldToMain() {
     return new Promise(resolve => setTimeout(resolve, 0));
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 文档收集
-// ─────────────────────────────────────────────────────────────────────────
+function clamp(v, min, max) {
+    return Math.max(min, Math.min(max, v));
+}
+
+function normalizeTerm(term) {
+    return String(term || '').trim().toLowerCase();
+}
+
+function computeIdfFromDf(df, docCount) {
+    if (!docCount || docCount <= 0) return 1;
+    const raw = Math.log((docCount + 1) / ((df || 0) + 1)) + 1;
+    return clamp(raw, IDF_MIN, IDF_MAX);
+}
+
+function computeIdf(term) {
+    const t = normalizeTerm(term);
+    if (!t || lexicalDocCount <= 0) return 1;
+    return computeIdfFromDf(termDfMap.get(t) || 0, lexicalDocCount);
+}
+
+function extractUniqueTokens(text) {
+    return new Set(tokenizeForIndex(String(text || '')).map(normalizeTerm).filter(Boolean));
+}
+
+function clearIdfState() {
+    termDfMap = new Map();
+    docTokenSets = new Map();
+    lexicalDocCount = 0;
+}
+
+function removeDocumentIdf(docId) {
+    const id = String(docId || '');
+    if (!id) return;
+
+    const tokens = docTokenSets.get(id);
+    if (!tokens) return;
+
+    for (const token of tokens) {
+        const current = termDfMap.get(token) || 0;
+        if (current <= 1) {
+            termDfMap.delete(token);
+        } else {
+            termDfMap.set(token, current - 1);
+        }
+    }
+
+    docTokenSets.delete(id);
+    lexicalDocCount = Math.max(0, lexicalDocCount - 1);
+}
+
+function addDocumentIdf(docId, text) {
+    const id = String(docId || '');
+    if (!id) return;
+
+    // Replace semantics: remove old token set first if this id already exists.
+    removeDocumentIdf(id);
+
+    const tokens = extractUniqueTokens(text);
+    docTokenSets.set(id, tokens);
+    lexicalDocCount += 1;
+
+    for (const token of tokens) {
+        termDfMap.set(token, (termDfMap.get(token) || 0) + 1);
+    }
+}
+
+function rebuildIdfFromDocs(docs) {
+    clearIdfState();
+    for (const doc of docs || []) {
+        const id = String(doc?.id || '');
+        const text = String(doc?.text || '');
+        if (!id || !text.trim()) continue;
+        addDocumentIdf(id, text);
+    }
+}
+
+function buildEventDoc(ev) {
+    if (!ev?.id) return null;
+
+    const parts = [];
+    if (ev.title) parts.push(ev.title);
+    if (ev.participants?.length) parts.push(ev.participants.join(' '));
+
+    const summary = cleanSummary(ev.summary);
+    if (summary) parts.push(summary);
+
+    const text = parts.join(' ').trim();
+    if (!text) return null;
+
+    return {
+        id: ev.id,
+        type: 'event',
+        floor: null,
+        text,
+    };
+}
 
-/**
- * 收集所有待索引文档
- *
- * @param {object[]} chunks - getAllChunks(chatId) 返回值
- * @param {object[]} events - store.json.events
- * @returns {object[]} 文档数组
- */
 function collectDocuments(chunks, events) {
     const docs = [];
 
-    // L1 chunks + 填充 floorDocIds
-    for (const chunk of (chunks || [])) {
+    for (const chunk of chunks || []) {
         if (!chunk?.chunkId || !chunk.text) continue;
 
         const floor = chunk.floor ?? -1;
@@ -101,48 +178,19 @@ function collectDocuments(chunks, events) {
         });
 
         if (floor >= 0) {
-            if (!floorDocIds.has(floor)) {
-                floorDocIds.set(floor, []);
-            }
+            if (!floorDocIds.has(floor)) floorDocIds.set(floor, []);
             floorDocIds.get(floor).push(chunk.chunkId);
         }
     }
 
-    // L2 events
-    for (const ev of (events || [])) {
-        if (!ev?.id) continue;
-        const parts = [];
-        if (ev.title) parts.push(ev.title);
-        if (ev.participants?.length) parts.push(ev.participants.join(' '));
-        const summary = cleanSummary(ev.summary);
-        if (summary) parts.push(summary);
-        const text = parts.join(' ').trim();
-        if (!text) continue;
-
-        docs.push({
-            id: ev.id,
-            type: 'event',
-            floor: null,
-            text,
-        });
+    for (const ev of events || []) {
+        const doc = buildEventDoc(ev);
+        if (doc) docs.push(doc);
     }
 
     return docs;
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 索引构建（分片，不阻塞主线程）
-// ─────────────────────────────────────────────────────────────────────────
-
-/** 每批添加的文档数 */
-const BUILD_BATCH_SIZE = 500;
-
-/**
- * 构建 MiniSearch 索引（分片异步）
- *
- * @param {object[]} docs - 文档数组
- * @returns {Promise<MiniSearch>}
- */
 async function buildIndexAsync(docs) {
     const T0 = performance.now();
 
@@ -158,49 +206,43 @@ async function buildIndexAsync(docs) {
         tokenize: tokenizeForIndex,
     });
 
-    if (!docs.length) {
-        return index;
-    }
+    if (!docs.length) return index;
 
-    // 分片添加，每批 BUILD_BATCH_SIZE 条后让出主线程
     for (let i = 0; i < docs.length; i += BUILD_BATCH_SIZE) {
         const batch = docs.slice(i, i + BUILD_BATCH_SIZE);
         index.addAll(batch);
 
-        // 非最后一批时让出主线程
         if (i + BUILD_BATCH_SIZE < docs.length) {
             await yieldToMain();
         }
     }
 
     const elapsed = Math.round(performance.now() - T0);
-    xbLog.info(MODULE_ID,
-        `索引构建完成: ${docs.length} 文档 (${elapsed}ms)`
-    );
-
+    xbLog.info(MODULE_ID, `Index built: ${docs.length} docs (${elapsed}ms)`);
     return index;
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 检索
-// ─────────────────────────────────────────────────────────────────────────
-
 /**
  * @typedef {object} LexicalSearchResult
- * @property {string[]} atomIds    - 命中的 L0 atom IDs
- * @property {Set<number>} atomFloors - 命中的 L0 楼层集合
- * @property {string[]} chunkIds   - 命中的 L1 chunk IDs
- * @property {Set<number>} chunkFloors - 命中的 L1 楼层集合
- * @property {string[]} eventIds   - 命中的 L2 event IDs
- * @property {object[]} chunkScores - chunk 命中详情 [{ chunkId, score }]
- * @property {number}   searchTime - 检索耗时 ms
+ * @property {string[]} atomIds - Reserved for backward compatibility (currently empty).
+ * @property {Set<number>} atomFloors - Reserved for backward compatibility (currently empty).
+ * @property {string[]} chunkIds - Matched L1 chunk ids sorted by weighted lexical score.
+ * @property {Set<number>} chunkFloors - Floor ids covered by matched chunks.
+ * @property {string[]} eventIds - Matched L2 event ids sorted by weighted lexical score.
+ * @property {object[]} chunkScores - Weighted lexical scores for matched chunks.
+ * @property {boolean} idfEnabled - Whether IDF stats are available for weighting.
+ * @property {number} idfDocCount - Number of lexical docs used to compute IDF.
+ * @property {Array<{term:string,idf:number}>} topIdfTerms - Top query terms by IDF.
+ * @property {number} termSearches - Number of per-term MiniSearch queries executed.
+ * @property {number} searchTime - Total lexical search time in milliseconds.
  */
 
 /**
- * 在词法索引中检索
+ * Search lexical index by terms, using per-term MiniSearch and IDF-weighted score aggregation.
+ * This keeps existing outputs compatible while adding observability fields.
  *
- * @param {MiniSearch} index - 索引实例
- * @param {string[]} terms - 查询词列表
+ * @param {MiniSearch} index
+ * @param {string[]} terms
  * @returns {LexicalSearchResult}
  */
 export function searchLexicalIndex(index, terms) {
@@ -213,6 +255,10 @@ export function searchLexicalIndex(index, terms) {
         chunkFloors: new Set(),
         eventIds: [],
         chunkScores: [],
+        idfEnabled: lexicalDocCount > 0,
+        idfDocCount: lexicalDocCount,
+        topIdfTerms: [],
+        termSearches: 0,
         searchTime: 0,
     };
 
@@ -221,79 +267,84 @@ export function searchLexicalIndex(index, terms) {
         return result;
     }
 
-    // 用所有 terms 联合查询
-    const queryString = terms.join(' ');
+    const queryTerms = Array.from(new Set((terms || []).map(normalizeTerm).filter(Boolean)));
+    const weightedScores = new Map(); // docId -> score
+    const hitMeta = new Map(); // docId -> { type, floor }
+    const idfPairs = [];
 
-    let hits;
-    try {
-        hits = index.search(queryString, {
-            boost: { text: 1 },
-            fuzzy: 0.2,
-            prefix: true,
-            combineWith: 'OR',
-            // 使用与索引相同的分词器
-            tokenize: tokenizeForIndex,
-        });
-    } catch (e) {
-        xbLog.warn(MODULE_ID, '检索失败', e);
-        result.searchTime = Math.round(performance.now() - T0);
-        return result;
+    for (const term of queryTerms) {
+        const idf = computeIdf(term);
+        idfPairs.push({ term, idf });
+
+        let hits = [];
+        try {
+            hits = index.search(term, {
+                boost: { text: 1 },
+                fuzzy: 0.2,
+                prefix: true,
+                combineWith: 'OR',
+                tokenize: tokenizeForIndex,
+            });
+        } catch (e) {
+            xbLog.warn(MODULE_ID, `Lexical term search failed: ${term}`, e);
+            continue;
+        }
+
+        result.termSearches += 1;
+
+        for (const hit of hits) {
+            const id = String(hit.id || '');
+            if (!id) continue;
+
+            const weighted = (hit.score || 0) * idf;
+            weightedScores.set(id, (weightedScores.get(id) || 0) + weighted);
+
+            if (!hitMeta.has(id)) {
+                hitMeta.set(id, {
+                    type: hit.type,
+                    floor: hit.floor,
+                });
+            }
+        }
     }
 
-    // 分类结果
-    const chunkIdSet = new Set();
-    const eventIdSet = new Set();
+    idfPairs.sort((a, b) => b.idf - a.idf);
+    result.topIdfTerms = idfPairs.slice(0, 5);
 
-    for (const hit of hits) {
-        const type = hit.type;
-        const id = hit.id;
-        const floor = hit.floor;
+    const sortedHits = Array.from(weightedScores.entries())
+        .sort((a, b) => b[1] - a[1]);
 
-        switch (type) {
-            case 'chunk':
-                if (!chunkIdSet.has(id)) {
-                    chunkIdSet.add(id);
-                    result.chunkIds.push(id);
-                    result.chunkScores.push({ chunkId: id, score: hit.score });
-                    if (typeof floor === 'number' && floor >= 0) {
-                        result.chunkFloors.add(floor);
-                    }
-                }
-                break;
+    for (const [id, score] of sortedHits) {
+        const meta = hitMeta.get(id);
+        if (!meta) continue;
 
-            case 'event':
-                if (!eventIdSet.has(id)) {
-                    eventIdSet.add(id);
-                    result.eventIds.push(id);
-                }
-                break;
+        if (meta.type === 'chunk') {
+            result.chunkIds.push(id);
+            result.chunkScores.push({ chunkId: id, score });
+            if (typeof meta.floor === 'number' && meta.floor >= 0) {
+                result.chunkFloors.add(meta.floor);
+            }
+            continue;
+        }
+
+        if (meta.type === 'event') {
+            result.eventIds.push(id);
         }
     }
 
     result.searchTime = Math.round(performance.now() - T0);
 
-    xbLog.info(MODULE_ID,
-        `检索完成: terms=[${terms.slice(0, 5).join(',')}] → atoms=${result.atomIds.length} chunks=${result.chunkIds.length} events=${result.eventIds.length} (${result.searchTime}ms)`
+    xbLog.info(
+        MODULE_ID,
+        `Lexical search terms=[${queryTerms.slice(0, 5).join(',')}] chunks=${result.chunkIds.length} events=${result.eventIds.length} termSearches=${result.termSearches} (${result.searchTime}ms)`,
     );
 
     return result;
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 内部构建流程（收集数据 + 构建索引）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 收集数据并构建索引
- *
- * @param {string} chatId
- * @returns {Promise<{index: MiniSearch, fingerprint: string}>}
- */
 async function collectAndBuild(chatId) {
-    // 清空侧索引（全量重建）
     floorDocIds = new Map();
 
-    // 收集数据（不含 L0 atoms）
     const store = getSummaryStore();
     const events = store?.json?.events || [];
 
@@ -301,48 +352,44 @@ async function collectAndBuild(chatId) {
     try {
         chunks = await getAllChunks(chatId);
     } catch (e) {
-        xbLog.warn(MODULE_ID, '获取 chunks 失败', e);
+        xbLog.warn(MODULE_ID, 'Failed to load chunks', e);
     }
 
-    const fp = computeFingerprint(chunks.length, events.length);
+    const docs = collectDocuments(chunks, events);
+    const fp = computeFingerprintFromDocs(docs);
 
-    // 检查是否在收集过程中缓存已被其他调用更新
     if (cachedIndex && cachedChatId === chatId && cachedFingerprint === fp) {
         return { index: cachedIndex, fingerprint: fp };
     }
 
-    // 收集文档（同时填充 floorDocIds）
-    const docs = collectDocuments(chunks, events);
-
-    // 异步分片构建
+    rebuildIdfFromDocs(docs);
     const index = await buildIndexAsync(docs);
 
     return { index, fingerprint: fp };
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：getLexicalIndex（惰性获取）
-// ─────────────────────────────────────────────────────────────────────────
-
 /**
- * 获取词法索引（惰性构建 + 缓存）
- *
- * 如果缓存有效则直接返回；否则自动构建。
- * 如果正在构建中，等待构建完成。
- *
- * @returns {Promise<MiniSearch|null>}
+ * Expose IDF accessor for query-term selection in query-builder.
+ * If index stats are not ready, this gracefully falls back to idf=1.
  */
+export function getLexicalIdfAccessor() {
+    return {
+        enabled: lexicalDocCount > 0,
+        docCount: lexicalDocCount,
+        getIdf(term) {
+            return computeIdf(term);
+        },
+    };
+}
+
 export async function getLexicalIndex() {
     const { chatId } = getContext();
     if (!chatId) return null;
 
-    // 快速路径：如果缓存存在且 chatId 未变，则直接命中
-    // 指纹校验放到构建流程中完成，避免为指纹而额外读一次 IndexedDB
     if (cachedIndex && cachedChatId === chatId && cachedFingerprint) {
         return cachedIndex;
     }
 
-    // 正在构建中，等待结果
     if (building && buildPromise) {
         try {
             await buildPromise;
@@ -350,27 +397,23 @@ export async function getLexicalIndex() {
                 return cachedIndex;
             }
         } catch {
-            // 构建失败，继续往下重建
+            // Continue to rebuild below.
         }
     }
 
-    // 需要重建（指纹将在 collectAndBuild 内部计算并写入缓存）
-    xbLog.info(MODULE_ID, `缓存失效，重建索引 (chatId=${chatId.slice(0, 8)})`);
+    xbLog.info(MODULE_ID, `Lexical cache miss; rebuilding (chatId=${chatId.slice(0, 8)})`);
 
     building = true;
     buildPromise = collectAndBuild(chatId);
 
     try {
         const { index, fingerprint } = await buildPromise;
-
-        // 原子替换缓存
         cachedIndex = index;
         cachedChatId = chatId;
         cachedFingerprint = fingerprint;
-
         return index;
     } catch (e) {
-        xbLog.error(MODULE_ID, '索引构建失败', e);
+        xbLog.error(MODULE_ID, 'Index build failed', e);
         return null;
     } finally {
         building = false;
@@ -378,74 +421,29 @@ export async function getLexicalIndex() {
     }
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：warmupIndex（异步预建）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 异步预建索引
- *
- * 在 CHAT_CHANGED 时调用，后台构建索引。
- * 不阻塞调用方，不返回结果。
- * 构建完成后缓存自动更新，后续 getLexicalIndex() 直接命中。
- *
- * 调用时机：
- * - handleChatChanged（实体注入后）
- * - L0 提取完成
- * - L2 总结完成
- */
 export function warmupIndex() {
     const { chatId } = getContext();
-    if (!chatId) return;
+    if (!chatId || building) return;
 
-    // 已在构建中，不重复触发
-    if (building) return;
-
-    // fire-and-forget
     getLexicalIndex().catch(e => {
-        xbLog.warn(MODULE_ID, '预热索引失败', e);
+        xbLog.warn(MODULE_ID, 'Warmup failed', e);
     });
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 公开接口：invalidateLexicalIndex（缓存失效）
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 使缓存失效（下次 getLexicalIndex / warmupIndex 时自动重建）
- *
- * 调用时机：
- * - CHAT_CHANGED
- * - L0 提取完成
- * - L2 总结完成
- */
 export function invalidateLexicalIndex() {
     if (cachedIndex) {
-        xbLog.info(MODULE_ID, '索引缓存已失效');
+        xbLog.info(MODULE_ID, 'Lexical index cache invalidated');
     }
     cachedIndex = null;
     cachedChatId = null;
     cachedFingerprint = null;
     floorDocIds = new Map();
+    clearIdfState();
 }
 
-// ─────────────────────────────────────────────────────────────────────────
-// 增量更新接口
-// ─────────────────────────────────────────────────────────────────────────
-
-/**
- * 为指定楼层添加 L1 chunks 到索引
- *
- * 先移除该楼层旧文档，再添加新文档。
- * 如果索引不存在（缓存失效），静默跳过（下次 getLexicalIndex 全量重建）。
- *
- * @param {number} floor - 楼层号
- * @param {object[]} chunks - chunk 对象列表（需有 chunkId、text、floor）
- */
 export function addDocumentsForFloor(floor, chunks) {
     if (!cachedIndex || !chunks?.length) return;
 
-    // 先移除旧文档
     removeDocumentsByFloor(floor);
 
     const docs = [];
@@ -453,30 +451,29 @@ export function addDocumentsForFloor(floor, chunks) {
 
     for (const chunk of chunks) {
         if (!chunk?.chunkId || !chunk.text) continue;
-        docs.push({
+
+        const doc = {
             id: chunk.chunkId,
             type: 'chunk',
             floor: chunk.floor ?? floor,
             text: chunk.text,
-        });
+        };
+        docs.push(doc);
         docIds.push(chunk.chunkId);
     }
 
-    if (docs.length > 0) {
-        cachedIndex.addAll(docs);
-        floorDocIds.set(floor, docIds);
-        xbLog.info(MODULE_ID, `增量添加: floor ${floor}, ${docs.length} 个 chunk`);
+    if (!docs.length) return;
+
+    cachedIndex.addAll(docs);
+    floorDocIds.set(floor, docIds);
+
+    for (const doc of docs) {
+        addDocumentIdf(doc.id, doc.text);
     }
+
+    xbLog.info(MODULE_ID, `Incremental add floor=${floor} chunks=${docs.length}`);
 }
 
-/**
- * 从索引中移除指定楼层的所有 L1 chunk 文档
- *
- * 使用 MiniSearch discard()（软删除）。
- * 如果索引不存在，静默跳过。
- *
- * @param {number} floor - 楼层号
- */
 export function removeDocumentsByFloor(floor) {
     if (!cachedIndex) return;
 
@@ -487,55 +484,39 @@ export function removeDocumentsByFloor(floor) {
         try {
             cachedIndex.discard(id);
         } catch {
-            // 文档可能不存在（已被全量重建替换）
+            // Ignore if the doc was already removed/rebuilt.
         }
+        removeDocumentIdf(id);
     }
 
     floorDocIds.delete(floor);
-    xbLog.info(MODULE_ID, `增量移除: floor ${floor}, ${docIds.length} 个文档`);
+    xbLog.info(MODULE_ID, `Incremental remove floor=${floor} chunks=${docIds.length}`);
 }
 
-/**
- * 将新 L2 事件添加到索引
- *
- * 如果事件 ID 已存在，先 discard 再 add（覆盖）。
- * 如果索引不存在，静默跳过。
- *
- * @param {object[]} events - 事件对象列表（需有 id、title、summary 等）
- */
 export function addEventDocuments(events) {
     if (!cachedIndex || !events?.length) return;
 
     const docs = [];
 
     for (const ev of events) {
-        if (!ev?.id) continue;
+        const doc = buildEventDoc(ev);
+        if (!doc) continue;
 
-        const parts = [];
-        if (ev.title) parts.push(ev.title);
-        if (ev.participants?.length) parts.push(ev.participants.join(' '));
-        const summary = cleanSummary(ev.summary);
-        if (summary) parts.push(summary);
-        const text = parts.join(' ').trim();
-        if (!text) continue;
-
-        // 覆盖：先尝试移除旧的
         try {
-            cachedIndex.discard(ev.id);
+            cachedIndex.discard(doc.id);
         } catch {
-            // 不存在则忽略
+            // Ignore if previous document does not exist.
         }
-
-        docs.push({
-            id: ev.id,
-            type: 'event',
-            floor: null,
-            text,
-        });
+        removeDocumentIdf(doc.id);
+        docs.push(doc);
     }
 
-    if (docs.length > 0) {
-        cachedIndex.addAll(docs);
-        xbLog.info(MODULE_ID, `增量添加: ${docs.length} 个事件`);
+    if (!docs.length) return;
+
+    cachedIndex.addAll(docs);
+    for (const doc of docs) {
+        addDocumentIdf(doc.id, doc.text);
     }
+
+    xbLog.info(MODULE_ID, `Incremental add events=${docs.length}`);
 }
diff --git a/modules/story-summary/vector/retrieval/metrics.js b/modules/story-summary/vector/retrieval/metrics.js
index 4530788..ecd06b4 100644
--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -52,6 +52,10 @@ export function createMetrics() {
             eventHits: 0,
             searchTime: 0,
             indexReadyTime: 0,
+            idfEnabled: false,
+            idfDocCount: 0,
+            topIdfTerms: [],
+            termSearches: 0,
             eventFilteredByDense: 0,
             floorFilteredByDense: 0,
         },
@@ -274,6 +278,20 @@ export function formatMetricsLog(metrics) {
     if (m.lexical.indexReadyTime > 0) {
         lines.push(`├─ index_ready_time: ${m.lexical.indexReadyTime}ms`);
     }
+    lines.push(`├─ idf_enabled: ${!!m.lexical.idfEnabled}`);
+    if (m.lexical.idfDocCount > 0) {
+        lines.push(`├─ idf_doc_count: ${m.lexical.idfDocCount}`);
+    }
+    if ((m.lexical.topIdfTerms || []).length > 0) {
+        const topIdfText = m.lexical.topIdfTerms
+            .slice(0, 5)
+            .map(x => `${x.term}:${x.idf}`)
+            .join(', ');
+        lines.push(`├─ top_idf_terms: [${topIdfText}]`);
+    }
+    if (m.lexical.termSearches > 0) {
+        lines.push(`├─ term_searches: ${m.lexical.termSearches}`);
+    }
     if (m.lexical.eventFilteredByDense > 0) {
         lines.push(`├─ event_filtered_by_dense: ${m.lexical.eventFilteredByDense}`);
     }
diff --git a/modules/story-summary/vector/retrieval/query-builder.js b/modules/story-summary/vector/retrieval/query-builder.js
index c5593a0..714a0a9 100644
--- a/modules/story-summary/vector/retrieval/query-builder.js
+++ b/modules/story-summary/vector/retrieval/query-builder.js
@@ -20,6 +20,7 @@
 
 import { getContext } from '../../../../../../../extensions.js';
 import { buildEntityLexicon, buildDisplayNameMap, extractEntitiesFromText, buildCharacterPools } from './entity-lexicon.js';
+import { getLexicalIdfAccessor } from './lexical-index.js';
 import { getSummaryStore } from '../../data/store.js';
 import { filterText } from '../utils/text-filter.js';
 import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';
@@ -106,6 +107,7 @@ export function computeLengthFactor(charCount) {
 function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
     if (!text) return [];
 
+    const idfAccessor = getLexicalIdfAccessor();
     const tokens = tokenizerTokenizeForIndex(text);
     const freq = new Map();
     for (const token of tokens) {
@@ -115,9 +117,13 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
     }
 
     return Array.from(freq.entries())
-        .sort((a, b) => b[1] - a[1])
+        .map(([term, tf]) => {
+            const idf = idfAccessor.enabled ? idfAccessor.getIdf(term) : 1;
+            return { term, tf, score: tf * idf };
+        })
+        .sort((a, b) => (b.score - a.score) || (b.tf - a.tf))
         .slice(0, maxTerms)
-        .map(([term]) => term);
+        .map(x => x.term);
 }
 
 // ─────────────────────────────────────────────────────────────────────────
diff --git a/modules/story-summary/vector/retrieval/recall.js b/modules/story-summary/vector/retrieval/recall.js
index b049e32..774f643 100644
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -984,6 +984,12 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
         : CONFIG.LAST_MESSAGES_K;
     const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);
 
+    // Non-blocking preload: keep recall latency stable.
+    // If not ready yet, query-builder will gracefully fall back to TF terms.
+    getLexicalIndex().catch((e) => {
+        xbLog.warn(MODULE_ID, 'Preload lexical index failed; continue with TF fallback', e);
+    });
+
     const bundle = buildQueryBundle(lastMessages, pendingUserMessage);
     const focusTerms = bundle.focusTerms || bundle.focusEntities || [];
     const focusCharacters = bundle.focusCharacters || [];
@@ -1161,6 +1167,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
         atomIds: [], atomFloors: new Set(),
         chunkIds: [], chunkFloors: new Set(),
         eventIds: [], chunkScores: [], searchTime: 0,
+        idfEnabled: false, idfDocCount: 0, topIdfTerms: [], termSearches: 0,
     };
 
     let indexReadyTime = 0;
@@ -1184,6 +1191,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
         metrics.lexical.searchTime = lexicalResult.searchTime || 0;
         metrics.lexical.indexReadyTime = indexReadyTime;
         metrics.lexical.terms = bundle.lexicalTerms.slice(0, 10);
+        metrics.lexical.idfEnabled = !!lexicalResult.idfEnabled;
+        metrics.lexical.idfDocCount = lexicalResult.idfDocCount || 0;
+        metrics.lexical.topIdfTerms = lexicalResult.topIdfTerms || [];
+        metrics.lexical.termSearches = lexicalResult.termSearches || 0;
     }
 
     // 合并 L2 events（lexical 命中但 dense 未命中的 events）
@@ -1238,7 +1249,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
     }
 
     xbLog.info(MODULE_ID,
-        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
+        `Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length} mergedEvents=+${lexicalEventCount} filteredByDense=${lexicalEventFilteredByDense} floorFiltered=${metrics.lexical.floorFilteredByDense || 0} idfEnabled=${lexicalResult.idfEnabled ? 'yes' : 'no'} idfDocs=${lexicalResult.idfDocCount || 0} termSearches=${lexicalResult.termSearches || 0} (indexReady=${indexReadyTime}ms search=${lexicalResult.searchTime || 0}ms total=${lexTime}ms)`
     );
 
     // ═══════════════════════════════════════════════════════════════════
diff --git a/modules/story-summary/vector/utils/stopwords-base.js b/modules/story-summary/vector/utils/stopwords-base.js
new file mode 100644
index 0000000..2ce6fa0
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-base.js
@@ -0,0 +1,2231 @@
+// Auto-generated stopword baseline for story-summary.
+// Source: stopwords-iso (MIT), snapshot files under ./stopwords-data
+// Languages merged: zh + ja + en
+// Do not edit manually. Update snapshot files then regenerate.
+
+export const BASE_STOP_WORDS = [
+    "、",
+    "。",
+    "〈",
+    "〉",
+    "《",
+    "》",
+    "一",
+    "一个",
+    "一些",
+    "一何",
+    "一切",
+    "一则",
+    "一方面",
+    "一旦",
+    "一来",
+    "一样",
+    "一种",
+    "一般",
+    "一转眼",
+    "七",
+    "万一",
+    "三",
+    "上",
+    "上下",
+    "下",
+    "不",
+    "不仅",
+    "不但",
+    "不光",
+    "不单",
+    "不只",
+    "不外乎",
+    "不如",
+    "不妨",
+    "不尽",
+    "不尽然",
+    "不得",
+    "不怕",
+    "不惟",
+    "不成",
+    "不拘",
+    "不料",
+    "不是",
+    "不比",
+    "不然",
+    "不特",
+    "不独",
+    "不管",
+    "不至于",
+    "不若",
+    "不论",
+    "不过",
+    "不问",
+    "与",
+    "与其",
+    "与其说",
+    "与否",
+    "与此同时",
+    "且",
+    "且不说",
+    "且说",
+    "两者",
+    "个",
+    "个别",
+    "中",
+    "临",
+    "为",
+    "为了",
+    "为什么",
+    "为何",
+    "为止",
+    "为此",
+    "为着",
+    "乃",
+    "乃至",
+    "乃至于",
+    "么",
+    "之",
+    "之一",
+    "之所以",
+    "之类",
+    "乌乎",
+    "乎",
+    "乘",
+    "九",
+    "也",
+    "也好",
+    "也罢",
+    "了",
+    "二",
+    "二来",
+    "于",
+    "于是",
+    "于是乎",
+    "云云",
+    "云尔",
+    "五",
+    "些",
+    "亦",
+    "人",
+    "人们",
+    "人家",
+    "什",
+    "什么",
+    "什么样",
+    "今",
+    "介于",
+    "仍",
+    "仍旧",
+    "从",
+    "从此",
+    "从而",
+    "他",
+    "他人",
+    "他们",
+    "他们们",
+    "以",
+    "以上",
+    "以为",
+    "以便",
+    "以免",
+    "以及",
+    "以故",
+    "以期",
+    "以来",
+    "以至",
+    "以至于",
+    "以致",
+    "们",
+    "任",
+    "任何",
+    "任凭",
+    "会",
+    "似的",
+    "但",
+    "但凡",
+    "但是",
+    "何",
+    "何以",
+    "何况",
+    "何处",
+    "何时",
+    "余外",
+    "作为",
+    "你",
+    "你们",
+    "使",
+    "使得",
+    "例如",
+    "依",
+    "依据",
+    "依照",
+    "便于",
+    "俺",
+    "俺们",
+    "倘",
+    "倘使",
+    "倘或",
+    "倘然",
+    "倘若",
+    "借",
+    "借傥然",
+    "假使",
+    "假如",
+    "假若",
+    "做",
+    "像",
+    "儿",
+    "先不先",
+    "光",
+    "光是",
+    "全体",
+    "全部",
+    "八",
+    "六",
+    "兮",
+    "共",
+    "关于",
+    "关于具体地说",
+    "其",
+    "其一",
+    "其中",
+    "其二",
+    "其他",
+    "其余",
+    "其它",
+    "其次",
+    "具体地说",
+    "具体说来",
+    "兼之",
+    "内",
+    "再",
+    "再其次",
+    "再则",
+    "再有",
+    "再者",
+    "再者说",
+    "再说",
+    "冒",
+    "冲",
+    "况且",
+    "几",
+    "几时",
+    "凡",
+    "凡是",
+    "凭",
+    "凭借",
+    "出于",
+    "出来",
+    "分",
+    "分别",
+    "则",
+    "则甚",
+    "别",
+    "别人",
+    "别处",
+    "别是",
+    "别的",
+    "别管",
+    "别说",
+    "到",
+    "前后",
+    "前此",
+    "前者",
+    "加之",
+    "加以",
+    "区",
+    "即",
+    "即令",
+    "即使",
+    "即便",
+    "即如",
+    "即或",
+    "即若",
+    "却",
+    "去",
+    "又",
+    "又及",
+    "及",
+    "及其",
+    "及至",
+    "反之",
+    "反而",
+    "反过来",
+    "反过来说",
+    "受到",
+    "另",
+    "另一方面",
+    "另外",
+    "另悉",
+    "只",
+    "只当",
+    "只怕",
+    "只是",
+    "只有",
+    "只消",
+    "只要",
+    "只限",
+    "叫",
+    "叮咚",
+    "可",
+    "可以",
+    "可是",
+    "可见",
+    "各",
+    "各个",
+    "各位",
+    "各种",
+    "各自",
+    "同",
+    "同时",
+    "后",
+    "后者",
+    "向",
+    "向使",
+    "向着",
+    "吓",
+    "吗",
+    "否则",
+    "吧",
+    "吧哒",
+    "含",
+    "吱",
+    "呀",
+    "呃",
+    "呕",
+    "呗",
+    "呜",
+    "呜呼",
+    "呢",
+    "呵",
+    "呵呵",
+    "呸",
+    "呼哧",
+    "咋",
+    "和",
+    "咚",
+    "咦",
+    "咧",
+    "咱",
+    "咱们",
+    "咳",
+    "哇",
+    "哈",
+    "哈哈",
+    "哉",
+    "哎",
+    "哎呀",
+    "哎哟",
+    "哗",
+    "哟",
+    "哦",
+    "哩",
+    "哪",
+    "哪个",
+    "哪些",
+    "哪儿",
+    "哪天",
+    "哪年",
+    "哪怕",
+    "哪样",
+    "哪边",
+    "哪里",
+    "哼",
+    "哼唷",
+    "唉",
+    "唯有",
+    "啊",
+    "啐",
+    "啥",
+    "啦",
+    "啪达",
+    "啷当",
+    "喂",
+    "喏",
+    "喔唷",
+    "喽",
+    "嗡",
+    "嗡嗡",
+    "嗬",
+    "嗯",
+    "嗳",
+    "嘎",
+    "嘎登",
+    "嘘",
+    "嘛",
+    "嘻",
+    "嘿",
+    "嘿嘿",
+    "四",
+    "因",
+    "因为",
+    "因了",
+    "因此",
+    "因着",
+    "因而",
+    "固然",
+    "在",
+    "在下",
+    "在于",
+    "地",
+    "基于",
+    "处在",
+    "多",
+    "多么",
+    "多少",
+    "大",
+    "大家",
+    "她",
+    "她们",
+    "好",
+    "如",
+    "如上",
+    "如上所述",
+    "如下",
+    "如何",
+    "如其",
+    "如同",
+    "如是",
+    "如果",
+    "如此",
+    "如若",
+    "始而",
+    "孰料",
+    "孰知",
+    "宁",
+    "宁可",
+    "宁愿",
+    "宁肯",
+    "它",
+    "它们",
+    "对",
+    "对于",
+    "对待",
+    "对方",
+    "对比",
+    "将",
+    "小",
+    "尔",
+    "尔后",
+    "尔尔",
+    "尚且",
+    "就",
+    "就是",
+    "就是了",
+    "就是说",
+    "就算",
+    "就要",
+    "尽",
+    "尽管",
+    "尽管如此",
+    "岂但",
+    "己",
+    "已",
+    "已矣",
+    "巴",
+    "巴巴",
+    "年",
+    "并",
+    "并且",
+    "庶乎",
+    "庶几",
+    "开外",
+    "开始",
+    "归",
+    "归齐",
+    "当",
+    "当地",
+    "当然",
+    "当着",
+    "彼",
+    "彼时",
+    "彼此",
+    "往",
+    "待",
+    "很",
+    "得",
+    "得了",
+    "怎",
+    "怎么",
+    "怎么办",
+    "怎么样",
+    "怎奈",
+    "怎样",
+    "总之",
+    "总的来看",
+    "总的来说",
+    "总的说来",
+    "总而言之",
+    "恰恰相反",
+    "您",
+    "惟其",
+    "慢说",
+    "我",
+    "我们",
+    "或",
+    "或则",
+    "或是",
+    "或曰",
+    "或者",
+    "截至",
+    "所",
+    "所以",
+    "所在",
+    "所幸",
+    "所有",
+    "才",
+    "才能",
+    "打",
+    "打从",
+    "把",
+    "抑或",
+    "拿",
+    "按",
+    "按照",
+    "换句话说",
+    "换言之",
+    "据",
+    "据此",
+    "接着",
+    "故",
+    "故此",
+    "故而",
+    "旁人",
+    "无",
+    "无宁",
+    "无论",
+    "既",
+    "既往",
+    "既是",
+    "既然",
+    "日",
+    "时",
+    "时候",
+    "是",
+    "是以",
+    "是的",
+    "更",
+    "曾",
+    "替",
+    "替代",
+    "最",
+    "月",
+    "有",
+    "有些",
+    "有关",
+    "有及",
+    "有时",
+    "有的",
+    "望",
+    "朝",
+    "朝着",
+    "本",
+    "本人",
+    "本地",
+    "本着",
+    "本身",
+    "来",
+    "来着",
+    "来自",
+    "来说",
+    "极了",
+    "果然",
+    "果真",
+    "某",
+    "某个",
+    "某些",
+    "某某",
+    "根据",
+    "欤",
+    "正值",
+    "正如",
+    "正巧",
+    "正是",
+    "此",
+    "此地",
+    "此处",
+    "此外",
+    "此时",
+    "此次",
+    "此间",
+    "毋宁",
+    "每",
+    "每当",
+    "比",
+    "比及",
+    "比如",
+    "比方",
+    "没奈何",
+    "沿",
+    "沿着",
+    "漫说",
+    "点",
+    "焉",
+    "然则",
+    "然后",
+    "然而",
+    "照",
+    "照着",
+    "犹且",
+    "犹自",
+    "甚且",
+    "甚么",
+    "甚或",
+    "甚而",
+    "甚至",
+    "甚至于",
+    "用",
+    "用来",
+    "由",
+    "由于",
+    "由是",
+    "由此",
+    "由此可见",
+    "的",
+    "的确",
+    "的话",
+    "直到",
+    "相对而言",
+    "省得",
+    "看",
+    "眨眼",
+    "着",
+    "着呢",
+    "矣",
+    "矣乎",
+    "矣哉",
+    "离",
+    "秒",
+    "称",
+    "竟而",
+    "第",
+    "等",
+    "等到",
+    "等等",
+    "简言之",
+    "管",
+    "类如",
+    "紧接着",
+    "纵",
+    "纵令",
+    "纵使",
+    "纵然",
+    "经",
+    "经过",
+    "结果",
+    "给",
+    "继之",
+    "继后",
+    "继而",
+    "综上所述",
+    "罢了",
+    "者",
+    "而",
+    "而且",
+    "而况",
+    "而后",
+    "而外",
+    "而已",
+    "而是",
+    "而言",
+    "能",
+    "能否",
+    "腾",
+    "自",
+    "自个儿",
+    "自从",
+    "自各儿",
+    "自后",
+    "自家",
+    "自己",
+    "自打",
+    "自身",
+    "至",
+    "至于",
+    "至今",
+    "至若",
+    "致",
+    "般的",
+    "若",
+    "若夫",
+    "若是",
+    "若果",
+    "若非",
+    "莫不然",
+    "莫如",
+    "莫若",
+    "虽",
+    "虽则",
+    "虽然",
+    "虽说",
+    "被",
+    "要",
+    "要不",
+    "要不是",
+    "要不然",
+    "要么",
+    "要是",
+    "譬喻",
+    "譬如",
+    "让",
+    "许多",
+    "论",
+    "设使",
+    "设或",
+    "设若",
+    "诚如",
+    "诚然",
+    "该",
+    "说",
+    "说来",
+    "请",
+    "诸",
+    "诸位",
+    "诸如",
+    "谁",
+    "谁人",
+    "谁料",
+    "谁知",
+    "贼死",
+    "赖以",
+    "赶",
+    "起",
+    "起见",
+    "趁",
+    "趁着",
+    "越是",
+    "距",
+    "跟",
+    "较",
+    "较之",
+    "边",
+    "过",
+    "还",
+    "还是",
+    "还有",
+    "还要",
+    "这",
+    "这一来",
+    "这个",
+    "这么",
+    "这么些",
+    "这么样",
+    "这么点儿",
+    "这些",
+    "这会儿",
+    "这儿",
+    "这就是说",
+    "这时",
+    "这样",
+    "这次",
+    "这般",
+    "这边",
+    "这里",
+    "进而",
+    "连",
+    "连同",
+    "逐步",
+    "通过",
+    "遵循",
+    "遵照",
+    "那",
+    "那个",
+    "那么",
+    "那么些",
+    "那么样",
+    "那些",
+    "那会儿",
+    "那儿",
+    "那时",
+    "那样",
+    "那般",
+    "那边",
+    "那里",
+    "都",
+    "鄙人",
+    "鉴于",
+    "针对",
+    "阿",
+    "除",
+    "除了",
+    "除外",
+    "除开",
+    "除此之外",
+    "除非",
+    "随",
+    "随后",
+    "随时",
+    "随着",
+    "难道说",
+    "零",
+    "非",
+    "非但",
+    "非徒",
+    "非特",
+    "非独",
+    "靠",
+    "顺",
+    "顺着",
+    "首先",
+    "︿",
+    "！",
+    "＃",
+    "＄",
+    "％",
+    "＆",
+    "（",
+    "）",
+    "＊",
+    "＋",
+    "，",
+    "０",
+    "１",
+    "２",
+    "３",
+    "４",
+    "５",
+    "６",
+    "７",
+    "８",
+    "９",
+    "：",
+    "；",
+    "＜",
+    "＞",
+    "？",
+    "＠",
+    "［",
+    "］",
+    "｛",
+    "｜",
+    "｝",
+    "～",
+    "￥",
+    "あそこ",
+    "あっ",
+    "あの",
+    "あのかた",
+    "あの人",
+    "あり",
+    "あります",
+    "ある",
+    "あれ",
+    "い",
+    "いう",
+    "います",
+    "いる",
+    "う",
+    "うち",
+    "え",
+    "お",
+    "および",
+    "おり",
+    "おります",
+    "か",
+    "かつて",
+    "から",
+    "が",
+    "き",
+    "ここ",
+    "こちら",
+    "こと",
+    "この",
+    "これ",
+    "これら",
+    "さ",
+    "さらに",
+    "し",
+    "しかし",
+    "する",
+    "ず",
+    "せ",
+    "せる",
+    "そこ",
+    "そして",
+    "その",
+    "その他",
+    "その後",
+    "それ",
+    "それぞれ",
+    "それで",
+    "た",
+    "ただし",
+    "たち",
+    "ため",
+    "たり",
+    "だ",
+    "だっ",
+    "だれ",
+    "つ",
+    "て",
+    "で",
+    "でき",
+    "できる",
+    "です",
+    "では",
+    "でも",
+    "と",
+    "という",
+    "といった",
+    "とき",
+    "ところ",
+    "として",
+    "とともに",
+    "とも",
+    "と共に",
+    "どこ",
+    "どの",
+    "な",
+    "ない",
+    "なお",
+    "なかっ",
+    "ながら",
+    "なく",
+    "なっ",
+    "など",
+    "なに",
+    "なら",
+    "なり",
+    "なる",
+    "なん",
+    "に",
+    "において",
+    "における",
+    "について",
+    "にて",
+    "によって",
+    "により",
+    "による",
+    "に対して",
+    "に対する",
+    "に関する",
+    "の",
+    "ので",
+    "のみ",
+    "は",
+    "ば",
+    "へ",
+    "ほか",
+    "ほとんど",
+    "ほど",
+    "ます",
+    "また",
+    "または",
+    "まで",
+    "も",
+    "もの",
+    "ものの",
+    "や",
+    "よう",
+    "より",
+    "ら",
+    "られ",
+    "られる",
+    "れ",
+    "れる",
+    "を",
+    "ん",
+    "及び",
+    "彼女",
+    "我々",
+    "特に",
+    "私",
+    "私達",
+    "貴方",
+    "貴方方",
+    "'ll",
+    "'tis",
+    "'twas",
+    "'ve",
+    "10",
+    "39",
+    "a",
+    "a's",
+    "able",
+    "ableabout",
+    "about",
+    "above",
+    "abroad",
+    "abst",
+    "accordance",
+    "according",
+    "accordingly",
+    "across",
+    "act",
+    "actually",
+    "ad",
+    "added",
+    "adj",
+    "adopted",
+    "ae",
+    "af",
+    "affected",
+    "affecting",
+    "affects",
+    "after",
+    "afterwards",
+    "ag",
+    "again",
+    "against",
+    "ago",
+    "ah",
+    "ahead",
+    "ai",
+    "ain't",
+    "aint",
+    "al",
+    "all",
+    "allow",
+    "allows",
+    "almost",
+    "alone",
+    "along",
+    "alongside",
+    "already",
+    "also",
+    "although",
+    "always",
+    "am",
+    "amid",
+    "amidst",
+    "among",
+    "amongst",
+    "amoungst",
+    "amount",
+    "an",
+    "and",
+    "announce",
+    "another",
+    "any",
+    "anybody",
+    "anyhow",
+    "anymore",
+    "anyone",
+    "anything",
+    "anyway",
+    "anyways",
+    "anywhere",
+    "ao",
+    "apart",
+    "apparently",
+    "appear",
+    "appreciate",
+    "appropriate",
+    "approximately",
+    "aq",
+    "ar",
+    "are",
+    "area",
+    "areas",
+    "aren",
+    "aren't",
+    "arent",
+    "arise",
+    "around",
+    "arpa",
+    "as",
+    "aside",
+    "ask",
+    "asked",
+    "asking",
+    "asks",
+    "associated",
+    "at",
+    "au",
+    "auth",
+    "available",
+    "aw",
+    "away",
+    "awfully",
+    "az",
+    "b",
+    "ba",
+    "back",
+    "backed",
+    "backing",
+    "backs",
+    "backward",
+    "backwards",
+    "bb",
+    "bd",
+    "be",
+    "became",
+    "because",
+    "become",
+    "becomes",
+    "becoming",
+    "been",
+    "before",
+    "beforehand",
+    "began",
+    "begin",
+    "beginning",
+    "beginnings",
+    "begins",
+    "behind",
+    "being",
+    "beings",
+    "believe",
+    "below",
+    "beside",
+    "besides",
+    "best",
+    "better",
+    "between",
+    "beyond",
+    "bf",
+    "bg",
+    "bh",
+    "bi",
+    "big",
+    "bill",
+    "billion",
+    "biol",
+    "bj",
+    "bm",
+    "bn",
+    "bo",
+    "both",
+    "bottom",
+    "br",
+    "brief",
+    "briefly",
+    "bs",
+    "bt",
+    "but",
+    "buy",
+    "bv",
+    "bw",
+    "by",
+    "bz",
+    "c",
+    "c'mon",
+    "c's",
+    "ca",
+    "call",
+    "came",
+    "can",
+    "can't",
+    "cannot",
+    "cant",
+    "caption",
+    "case",
+    "cases",
+    "cause",
+    "causes",
+    "cc",
+    "cd",
+    "certain",
+    "certainly",
+    "cf",
+    "cg",
+    "ch",
+    "changes",
+    "ci",
+    "ck",
+    "cl",
+    "clear",
+    "clearly",
+    "click",
+    "cm",
+    "cmon",
+    "cn",
+    "co",
+    "co.",
+    "com",
+    "come",
+    "comes",
+    "computer",
+    "con",
+    "concerning",
+    "consequently",
+    "consider",
+    "considering",
+    "contain",
+    "containing",
+    "contains",
+    "copy",
+    "corresponding",
+    "could",
+    "could've",
+    "couldn",
+    "couldn't",
+    "couldnt",
+    "course",
+    "cr",
+    "cry",
+    "cs",
+    "cu",
+    "currently",
+    "cv",
+    "cx",
+    "cy",
+    "cz",
+    "d",
+    "dare",
+    "daren't",
+    "darent",
+    "date",
+    "de",
+    "dear",
+    "definitely",
+    "describe",
+    "described",
+    "despite",
+    "detail",
+    "did",
+    "didn",
+    "didn't",
+    "didnt",
+    "differ",
+    "different",
+    "differently",
+    "directly",
+    "dj",
+    "dk",
+    "dm",
+    "do",
+    "does",
+    "doesn",
+    "doesn't",
+    "doesnt",
+    "doing",
+    "don",
+    "don't",
+    "done",
+    "dont",
+    "doubtful",
+    "down",
+    "downed",
+    "downing",
+    "downs",
+    "downwards",
+    "due",
+    "during",
+    "dz",
+    "e",
+    "each",
+    "early",
+    "ec",
+    "ed",
+    "edu",
+    "ee",
+    "effect",
+    "eg",
+    "eh",
+    "eight",
+    "eighty",
+    "either",
+    "eleven",
+    "else",
+    "elsewhere",
+    "empty",
+    "end",
+    "ended",
+    "ending",
+    "ends",
+    "enough",
+    "entirely",
+    "er",
+    "es",
+    "especially",
+    "et",
+    "et-al",
+    "etc",
+    "even",
+    "evenly",
+    "ever",
+    "evermore",
+    "every",
+    "everybody",
+    "everyone",
+    "everything",
+    "everywhere",
+    "ex",
+    "exactly",
+    "example",
+    "except",
+    "f",
+    "face",
+    "faces",
+    "fact",
+    "facts",
+    "fairly",
+    "far",
+    "farther",
+    "felt",
+    "few",
+    "fewer",
+    "ff",
+    "fi",
+    "fifteen",
+    "fifth",
+    "fifty",
+    "fify",
+    "fill",
+    "find",
+    "finds",
+    "fire",
+    "first",
+    "five",
+    "fix",
+    "fj",
+    "fk",
+    "fm",
+    "fo",
+    "followed",
+    "following",
+    "follows",
+    "for",
+    "forever",
+    "former",
+    "formerly",
+    "forth",
+    "forty",
+    "forward",
+    "found",
+    "four",
+    "fr",
+    "free",
+    "from",
+    "front",
+    "full",
+    "fully",
+    "further",
+    "furthered",
+    "furthering",
+    "furthermore",
+    "furthers",
+    "fx",
+    "g",
+    "ga",
+    "gave",
+    "gb",
+    "gd",
+    "ge",
+    "general",
+    "generally",
+    "get",
+    "gets",
+    "getting",
+    "gf",
+    "gg",
+    "gh",
+    "gi",
+    "give",
+    "given",
+    "gives",
+    "giving",
+    "gl",
+    "gm",
+    "gmt",
+    "gn",
+    "go",
+    "goes",
+    "going",
+    "gone",
+    "good",
+    "goods",
+    "got",
+    "gotten",
+    "gov",
+    "gp",
+    "gq",
+    "gr",
+    "great",
+    "greater",
+    "greatest",
+    "greetings",
+    "group",
+    "grouped",
+    "grouping",
+    "groups",
+    "gs",
+    "gt",
+    "gu",
+    "gw",
+    "gy",
+    "h",
+    "had",
+    "hadn't",
+    "hadnt",
+    "half",
+    "happens",
+    "hardly",
+    "has",
+    "hasn",
+    "hasn't",
+    "hasnt",
+    "have",
+    "haven",
+    "haven't",
+    "havent",
+    "having",
+    "he",
+    "he'd",
+    "he'll",
+    "he's",
+    "hed",
+    "hell",
+    "hello",
+    "help",
+    "hence",
+    "her",
+    "here",
+    "here's",
+    "hereafter",
+    "hereby",
+    "herein",
+    "heres",
+    "hereupon",
+    "hers",
+    "herself",
+    "herse”",
+    "hes",
+    "hi",
+    "hid",
+    "high",
+    "higher",
+    "highest",
+    "him",
+    "himself",
+    "himse”",
+    "his",
+    "hither",
+    "hk",
+    "hm",
+    "hn",
+    "home",
+    "homepage",
+    "hopefully",
+    "how",
+    "how'd",
+    "how'll",
+    "how's",
+    "howbeit",
+    "however",
+    "hr",
+    "ht",
+    "htm",
+    "html",
+    "http",
+    "hu",
+    "hundred",
+    "i",
+    "i'd",
+    "i'll",
+    "i'm",
+    "i've",
+    "i.e.",
+    "id",
+    "ie",
+    "if",
+    "ignored",
+    "ii",
+    "il",
+    "ill",
+    "im",
+    "immediate",
+    "immediately",
+    "importance",
+    "important",
+    "in",
+    "inasmuch",
+    "inc",
+    "inc.",
+    "indeed",
+    "index",
+    "indicate",
+    "indicated",
+    "indicates",
+    "information",
+    "inner",
+    "inside",
+    "insofar",
+    "instead",
+    "int",
+    "interest",
+    "interested",
+    "interesting",
+    "interests",
+    "into",
+    "invention",
+    "inward",
+    "io",
+    "iq",
+    "ir",
+    "is",
+    "isn",
+    "isn't",
+    "isnt",
+    "it",
+    "it'd",
+    "it'll",
+    "it's",
+    "itd",
+    "itll",
+    "its",
+    "itself",
+    "itse”",
+    "ive",
+    "j",
+    "je",
+    "jm",
+    "jo",
+    "join",
+    "jp",
+    "just",
+    "k",
+    "ke",
+    "keep",
+    "keeps",
+    "kept",
+    "keys",
+    "kg",
+    "kh",
+    "ki",
+    "kind",
+    "km",
+    "kn",
+    "knew",
+    "know",
+    "known",
+    "knows",
+    "kp",
+    "kr",
+    "kw",
+    "ky",
+    "kz",
+    "l",
+    "la",
+    "large",
+    "largely",
+    "last",
+    "lately",
+    "later",
+    "latest",
+    "latter",
+    "latterly",
+    "lb",
+    "lc",
+    "least",
+    "length",
+    "less",
+    "lest",
+    "let",
+    "let's",
+    "lets",
+    "li",
+    "like",
+    "liked",
+    "likely",
+    "likewise",
+    "line",
+    "little",
+    "lk",
+    "ll",
+    "long",
+    "longer",
+    "longest",
+    "look",
+    "looking",
+    "looks",
+    "low",
+    "lower",
+    "lr",
+    "ls",
+    "lt",
+    "ltd",
+    "lu",
+    "lv",
+    "ly",
+    "m",
+    "ma",
+    "made",
+    "mainly",
+    "make",
+    "makes",
+    "making",
+    "man",
+    "many",
+    "may",
+    "maybe",
+    "mayn't",
+    "maynt",
+    "mc",
+    "md",
+    "me",
+    "mean",
+    "means",
+    "meantime",
+    "meanwhile",
+    "member",
+    "members",
+    "men",
+    "merely",
+    "mg",
+    "mh",
+    "microsoft",
+    "might",
+    "might've",
+    "mightn't",
+    "mightnt",
+    "mil",
+    "mill",
+    "million",
+    "mine",
+    "minus",
+    "miss",
+    "mk",
+    "ml",
+    "mm",
+    "mn",
+    "mo",
+    "more",
+    "moreover",
+    "most",
+    "mostly",
+    "move",
+    "mp",
+    "mq",
+    "mr",
+    "mrs",
+    "ms",
+    "msie",
+    "mt",
+    "mu",
+    "much",
+    "mug",
+    "must",
+    "must've",
+    "mustn't",
+    "mustnt",
+    "mv",
+    "mw",
+    "mx",
+    "my",
+    "myself",
+    "myse”",
+    "mz",
+    "n",
+    "na",
+    "name",
+    "namely",
+    "nay",
+    "nc",
+    "nd",
+    "ne",
+    "near",
+    "nearly",
+    "necessarily",
+    "necessary",
+    "need",
+    "needed",
+    "needing",
+    "needn't",
+    "neednt",
+    "needs",
+    "neither",
+    "net",
+    "netscape",
+    "never",
+    "neverf",
+    "neverless",
+    "nevertheless",
+    "new",
+    "newer",
+    "newest",
+    "next",
+    "nf",
+    "ng",
+    "ni",
+    "nine",
+    "ninety",
+    "nl",
+    "no",
+    "no-one",
+    "nobody",
+    "non",
+    "none",
+    "nonetheless",
+    "noone",
+    "nor",
+    "normally",
+    "nos",
+    "not",
+    "noted",
+    "nothing",
+    "notwithstanding",
+    "novel",
+    "now",
+    "nowhere",
+    "np",
+    "nr",
+    "nu",
+    "null",
+    "number",
+    "numbers",
+    "nz",
+    "o",
+    "obtain",
+    "obtained",
+    "obviously",
+    "of",
+    "off",
+    "often",
+    "oh",
+    "ok",
+    "okay",
+    "old",
+    "older",
+    "oldest",
+    "om",
+    "omitted",
+    "on",
+    "once",
+    "one",
+    "one's",
+    "ones",
+    "only",
+    "onto",
+    "open",
+    "opened",
+    "opening",
+    "opens",
+    "opposite",
+    "or",
+    "ord",
+    "order",
+    "ordered",
+    "ordering",
+    "orders",
+    "org",
+    "other",
+    "others",
+    "otherwise",
+    "ought",
+    "oughtn't",
+    "oughtnt",
+    "our",
+    "ours",
+    "ourselves",
+    "out",
+    "outside",
+    "over",
+    "overall",
+    "owing",
+    "own",
+    "p",
+    "pa",
+    "page",
+    "pages",
+    "part",
+    "parted",
+    "particular",
+    "particularly",
+    "parting",
+    "parts",
+    "past",
+    "pe",
+    "per",
+    "perhaps",
+    "pf",
+    "pg",
+    "ph",
+    "pk",
+    "pl",
+    "place",
+    "placed",
+    "places",
+    "please",
+    "plus",
+    "pm",
+    "pmid",
+    "pn",
+    "point",
+    "pointed",
+    "pointing",
+    "points",
+    "poorly",
+    "possible",
+    "possibly",
+    "potentially",
+    "pp",
+    "pr",
+    "predominantly",
+    "present",
+    "presented",
+    "presenting",
+    "presents",
+    "presumably",
+    "previously",
+    "primarily",
+    "probably",
+    "problem",
+    "problems",
+    "promptly",
+    "proud",
+    "provided",
+    "provides",
+    "pt",
+    "put",
+    "puts",
+    "pw",
+    "py",
+    "q",
+    "qa",
+    "que",
+    "quickly",
+    "quite",
+    "qv",
+    "r",
+    "ran",
+    "rather",
+    "rd",
+    "re",
+    "readily",
+    "really",
+    "reasonably",
+    "recent",
+    "recently",
+    "ref",
+    "refs",
+    "regarding",
+    "regardless",
+    "regards",
+    "related",
+    "relatively",
+    "research",
+    "reserved",
+    "respectively",
+    "resulted",
+    "resulting",
+    "results",
+    "right",
+    "ring",
+    "ro",
+    "room",
+    "rooms",
+    "round",
+    "ru",
+    "run",
+    "rw",
+    "s",
+    "sa",
+    "said",
+    "same",
+    "saw",
+    "say",
+    "saying",
+    "says",
+    "sb",
+    "sc",
+    "sd",
+    "se",
+    "sec",
+    "second",
+    "secondly",
+    "seconds",
+    "section",
+    "see",
+    "seeing",
+    "seem",
+    "seemed",
+    "seeming",
+    "seems",
+    "seen",
+    "sees",
+    "self",
+    "selves",
+    "sensible",
+    "sent",
+    "serious",
+    "seriously",
+    "seven",
+    "seventy",
+    "several",
+    "sg",
+    "sh",
+    "shall",
+    "shan't",
+    "shant",
+    "she",
+    "she'd",
+    "she'll",
+    "she's",
+    "shed",
+    "shell",
+    "shes",
+    "should",
+    "should've",
+    "shouldn",
+    "shouldn't",
+    "shouldnt",
+    "show",
+    "showed",
+    "showing",
+    "shown",
+    "showns",
+    "shows",
+    "si",
+    "side",
+    "sides",
+    "significant",
+    "significantly",
+    "similar",
+    "similarly",
+    "since",
+    "sincere",
+    "site",
+    "six",
+    "sixty",
+    "sj",
+    "sk",
+    "sl",
+    "slightly",
+    "sm",
+    "small",
+    "smaller",
+    "smallest",
+    "sn",
+    "so",
+    "some",
+    "somebody",
+    "someday",
+    "somehow",
+    "someone",
+    "somethan",
+    "something",
+    "sometime",
+    "sometimes",
+    "somewhat",
+    "somewhere",
+    "soon",
+    "sorry",
+    "specifically",
+    "specified",
+    "specify",
+    "specifying",
+    "sr",
+    "st",
+    "state",
+    "states",
+    "still",
+    "stop",
+    "strongly",
+    "su",
+    "sub",
+    "substantially",
+    "successfully",
+    "such",
+    "sufficiently",
+    "suggest",
+    "sup",
+    "sure",
+    "sv",
+    "sy",
+    "system",
+    "sz",
+    "t",
+    "t's",
+    "take",
+    "taken",
+    "taking",
+    "tc",
+    "td",
+    "tell",
+    "ten",
+    "tends",
+    "test",
+    "text",
+    "tf",
+    "tg",
+    "th",
+    "than",
+    "thank",
+    "thanks",
+    "thanx",
+    "that",
+    "that'll",
+    "that's",
+    "that've",
+    "thatll",
+    "thats",
+    "thatve",
+    "the",
+    "their",
+    "theirs",
+    "them",
+    "themselves",
+    "then",
+    "thence",
+    "there",
+    "there'd",
+    "there'll",
+    "there're",
+    "there's",
+    "there've",
+    "thereafter",
+    "thereby",
+    "thered",
+    "therefore",
+    "therein",
+    "therell",
+    "thereof",
+    "therere",
+    "theres",
+    "thereto",
+    "thereupon",
+    "thereve",
+    "these",
+    "they",
+    "they'd",
+    "they'll",
+    "they're",
+    "they've",
+    "theyd",
+    "theyll",
+    "theyre",
+    "theyve",
+    "thick",
+    "thin",
+    "thing",
+    "things",
+    "think",
+    "thinks",
+    "third",
+    "thirty",
+    "this",
+    "thorough",
+    "thoroughly",
+    "those",
+    "thou",
+    "though",
+    "thoughh",
+    "thought",
+    "thoughts",
+    "thousand",
+    "three",
+    "throug",
+    "through",
+    "throughout",
+    "thru",
+    "thus",
+    "til",
+    "till",
+    "tip",
+    "tis",
+    "tj",
+    "tk",
+    "tm",
+    "tn",
+    "to",
+    "today",
+    "together",
+    "too",
+    "took",
+    "top",
+    "toward",
+    "towards",
+    "tp",
+    "tr",
+    "tried",
+    "tries",
+    "trillion",
+    "truly",
+    "try",
+    "trying",
+    "ts",
+    "tt",
+    "turn",
+    "turned",
+    "turning",
+    "turns",
+    "tv",
+    "tw",
+    "twas",
+    "twelve",
+    "twenty",
+    "twice",
+    "two",
+    "tz",
+    "u",
+    "ua",
+    "ug",
+    "uk",
+    "um",
+    "un",
+    "under",
+    "underneath",
+    "undoing",
+    "unfortunately",
+    "unless",
+    "unlike",
+    "unlikely",
+    "until",
+    "unto",
+    "up",
+    "upon",
+    "ups",
+    "upwards",
+    "us",
+    "use",
+    "used",
+    "useful",
+    "usefully",
+    "usefulness",
+    "uses",
+    "using",
+    "usually",
+    "uucp",
+    "uy",
+    "uz",
+    "v",
+    "va",
+    "value",
+    "various",
+    "vc",
+    "ve",
+    "versus",
+    "very",
+    "vg",
+    "vi",
+    "via",
+    "viz",
+    "vn",
+    "vol",
+    "vols",
+    "vs",
+    "vu",
+    "w",
+    "want",
+    "wanted",
+    "wanting",
+    "wants",
+    "was",
+    "wasn",
+    "wasn't",
+    "wasnt",
+    "way",
+    "ways",
+    "we",
+    "we'd",
+    "we'll",
+    "we're",
+    "we've",
+    "web",
+    "webpage",
+    "website",
+    "wed",
+    "welcome",
+    "well",
+    "wells",
+    "went",
+    "were",
+    "weren",
+    "weren't",
+    "werent",
+    "weve",
+    "wf",
+    "what",
+    "what'd",
+    "what'll",
+    "what's",
+    "what've",
+    "whatever",
+    "whatll",
+    "whats",
+    "whatve",
+    "when",
+    "when'd",
+    "when'll",
+    "when's",
+    "whence",
+    "whenever",
+    "where",
+    "where'd",
+    "where'll",
+    "where's",
+    "whereafter",
+    "whereas",
+    "whereby",
+    "wherein",
+    "wheres",
+    "whereupon",
+    "wherever",
+    "whether",
+    "which",
+    "whichever",
+    "while",
+    "whilst",
+    "whim",
+    "whither",
+    "who",
+    "who'd",
+    "who'll",
+    "who's",
+    "whod",
+    "whoever",
+    "whole",
+    "wholl",
+    "whom",
+    "whomever",
+    "whos",
+    "whose",
+    "why",
+    "why'd",
+    "why'll",
+    "why's",
+    "widely",
+    "width",
+    "will",
+    "willing",
+    "wish",
+    "with",
+    "within",
+    "without",
+    "won",
+    "won't",
+    "wonder",
+    "wont",
+    "words",
+    "work",
+    "worked",
+    "working",
+    "works",
+    "world",
+    "would",
+    "would've",
+    "wouldn",
+    "wouldn't",
+    "wouldnt",
+    "ws",
+    "www",
+    "x",
+    "y",
+    "ye",
+    "year",
+    "years",
+    "yes",
+    "yet",
+    "you",
+    "you'd",
+    "you'll",
+    "you're",
+    "you've",
+    "youd",
+    "youll",
+    "young",
+    "younger",
+    "youngest",
+    "your",
+    "youre",
+    "yours",
+    "yourself",
+    "yourselves",
+    "youve",
+    "yt",
+    "yu",
+    "z",
+    "za",
+    "zero",
+    "zm",
+    "zr"
+];
diff --git a/modules/story-summary/vector/utils/stopwords-data/LICENSE.stopwords-iso.txt b/modules/story-summary/vector/utils/stopwords-data/LICENSE.stopwords-iso.txt
new file mode 100644
index 0000000..0076d3c
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-data/LICENSE.stopwords-iso.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2020 Gene Diaz
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/modules/story-summary/vector/utils/stopwords-data/SOURCES.md b/modules/story-summary/vector/utils/stopwords-data/SOURCES.md
new file mode 100644
index 0000000..1402c7e
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-data/SOURCES.md
@@ -0,0 +1,15 @@
+﻿# stopwords sources for story-summary
+
+- Dataset: `stopwords-iso` (npm package, version 1.1.0)
+- Repository: https://github.com/stopwords-iso/stopwords-iso
+- License: MIT
+- Snapshot date: 2026-02-16
+- Languages used: `zh`, `ja`, `en`
+- Local snapshot files:
+  - `stopwords-iso.zh.txt`
+  - `stopwords-iso.ja.txt`
+  - `stopwords-iso.en.txt`
+
+Generation note:
+- `modules/story-summary/vector/utils/stopwords-base.js` is generated from these snapshot files.
+- Keep `stopwords-patch.js` for tiny domain overrides only.
diff --git a/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.en.txt b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.en.txt
new file mode 100644
index 0000000..0efb051
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.en.txt
@@ -0,0 +1,1298 @@
+'ll
+'tis
+'twas
+'ve
+10
+39
+a
+a's
+able
+ableabout
+about
+above
+abroad
+abst
+accordance
+according
+accordingly
+across
+act
+actually
+ad
+added
+adj
+adopted
+ae
+af
+affected
+affecting
+affects
+after
+afterwards
+ag
+again
+against
+ago
+ah
+ahead
+ai
+ain't
+aint
+al
+all
+allow
+allows
+almost
+alone
+along
+alongside
+already
+also
+although
+always
+am
+amid
+amidst
+among
+amongst
+amoungst
+amount
+an
+and
+announce
+another
+any
+anybody
+anyhow
+anymore
+anyone
+anything
+anyway
+anyways
+anywhere
+ao
+apart
+apparently
+appear
+appreciate
+appropriate
+approximately
+aq
+ar
+are
+area
+areas
+aren
+aren't
+arent
+arise
+around
+arpa
+as
+aside
+ask
+asked
+asking
+asks
+associated
+at
+au
+auth
+available
+aw
+away
+awfully
+az
+b
+ba
+back
+backed
+backing
+backs
+backward
+backwards
+bb
+bd
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+began
+begin
+beginning
+beginnings
+begins
+behind
+being
+beings
+believe
+below
+beside
+besides
+best
+better
+between
+beyond
+bf
+bg
+bh
+bi
+big
+bill
+billion
+biol
+bj
+bm
+bn
+bo
+both
+bottom
+br
+brief
+briefly
+bs
+bt
+but
+buy
+bv
+bw
+by
+bz
+c
+c'mon
+c's
+ca
+call
+came
+can
+can't
+cannot
+cant
+caption
+case
+cases
+cause
+causes
+cc
+cd
+certain
+certainly
+cf
+cg
+ch
+changes
+ci
+ck
+cl
+clear
+clearly
+click
+cm
+cmon
+cn
+co
+co.
+com
+come
+comes
+computer
+con
+concerning
+consequently
+consider
+considering
+contain
+containing
+contains
+copy
+corresponding
+could
+could've
+couldn
+couldn't
+couldnt
+course
+cr
+cry
+cs
+cu
+currently
+cv
+cx
+cy
+cz
+d
+dare
+daren't
+darent
+date
+de
+dear
+definitely
+describe
+described
+despite
+detail
+did
+didn
+didn't
+didnt
+differ
+different
+differently
+directly
+dj
+dk
+dm
+do
+does
+doesn
+doesn't
+doesnt
+doing
+don
+don't
+done
+dont
+doubtful
+down
+downed
+downing
+downs
+downwards
+due
+during
+dz
+e
+each
+early
+ec
+ed
+edu
+ee
+effect
+eg
+eh
+eight
+eighty
+either
+eleven
+else
+elsewhere
+empty
+end
+ended
+ending
+ends
+enough
+entirely
+er
+es
+especially
+et
+et-al
+etc
+even
+evenly
+ever
+evermore
+every
+everybody
+everyone
+everything
+everywhere
+ex
+exactly
+example
+except
+f
+face
+faces
+fact
+facts
+fairly
+far
+farther
+felt
+few
+fewer
+ff
+fi
+fifteen
+fifth
+fifty
+fify
+fill
+find
+finds
+fire
+first
+five
+fix
+fj
+fk
+fm
+fo
+followed
+following
+follows
+for
+forever
+former
+formerly
+forth
+forty
+forward
+found
+four
+fr
+free
+from
+front
+full
+fully
+further
+furthered
+furthering
+furthermore
+furthers
+fx
+g
+ga
+gave
+gb
+gd
+ge
+general
+generally
+get
+gets
+getting
+gf
+gg
+gh
+gi
+give
+given
+gives
+giving
+gl
+gm
+gmt
+gn
+go
+goes
+going
+gone
+good
+goods
+got
+gotten
+gov
+gp
+gq
+gr
+great
+greater
+greatest
+greetings
+group
+grouped
+grouping
+groups
+gs
+gt
+gu
+gw
+gy
+h
+had
+hadn't
+hadnt
+half
+happens
+hardly
+has
+hasn
+hasn't
+hasnt
+have
+haven
+haven't
+havent
+having
+he
+he'd
+he'll
+he's
+hed
+hell
+hello
+help
+hence
+her
+here
+here's
+hereafter
+hereby
+herein
+heres
+hereupon
+hers
+herself
+herse”
+hes
+hi
+hid
+high
+higher
+highest
+him
+himself
+himse”
+his
+hither
+hk
+hm
+hn
+home
+homepage
+hopefully
+how
+how'd
+how'll
+how's
+howbeit
+however
+hr
+ht
+htm
+html
+http
+hu
+hundred
+i
+i'd
+i'll
+i'm
+i've
+i.e.
+id
+ie
+if
+ignored
+ii
+il
+ill
+im
+immediate
+immediately
+importance
+important
+in
+inasmuch
+inc
+inc.
+indeed
+index
+indicate
+indicated
+indicates
+information
+inner
+inside
+insofar
+instead
+int
+interest
+interested
+interesting
+interests
+into
+invention
+inward
+io
+iq
+ir
+is
+isn
+isn't
+isnt
+it
+it'd
+it'll
+it's
+itd
+itll
+its
+itself
+itse”
+ive
+j
+je
+jm
+jo
+join
+jp
+just
+k
+ke
+keep
+keeps
+kept
+keys
+kg
+kh
+ki
+kind
+km
+kn
+knew
+know
+known
+knows
+kp
+kr
+kw
+ky
+kz
+l
+la
+large
+largely
+last
+lately
+later
+latest
+latter
+latterly
+lb
+lc
+least
+length
+less
+lest
+let
+let's
+lets
+li
+like
+liked
+likely
+likewise
+line
+little
+lk
+ll
+long
+longer
+longest
+look
+looking
+looks
+low
+lower
+lr
+ls
+lt
+ltd
+lu
+lv
+ly
+m
+ma
+made
+mainly
+make
+makes
+making
+man
+many
+may
+maybe
+mayn't
+maynt
+mc
+md
+me
+mean
+means
+meantime
+meanwhile
+member
+members
+men
+merely
+mg
+mh
+microsoft
+might
+might've
+mightn't
+mightnt
+mil
+mill
+million
+mine
+minus
+miss
+mk
+ml
+mm
+mn
+mo
+more
+moreover
+most
+mostly
+move
+mp
+mq
+mr
+mrs
+ms
+msie
+mt
+mu
+much
+mug
+must
+must've
+mustn't
+mustnt
+mv
+mw
+mx
+my
+myself
+myse”
+mz
+n
+na
+name
+namely
+nay
+nc
+nd
+ne
+near
+nearly
+necessarily
+necessary
+need
+needed
+needing
+needn't
+neednt
+needs
+neither
+net
+netscape
+never
+neverf
+neverless
+nevertheless
+new
+newer
+newest
+next
+nf
+ng
+ni
+nine
+ninety
+nl
+no
+no-one
+nobody
+non
+none
+nonetheless
+noone
+nor
+normally
+nos
+not
+noted
+nothing
+notwithstanding
+novel
+now
+nowhere
+np
+nr
+nu
+null
+number
+numbers
+nz
+o
+obtain
+obtained
+obviously
+of
+off
+often
+oh
+ok
+okay
+old
+older
+oldest
+om
+omitted
+on
+once
+one
+one's
+ones
+only
+onto
+open
+opened
+opening
+opens
+opposite
+or
+ord
+order
+ordered
+ordering
+orders
+org
+other
+others
+otherwise
+ought
+oughtn't
+oughtnt
+our
+ours
+ourselves
+out
+outside
+over
+overall
+owing
+own
+p
+pa
+page
+pages
+part
+parted
+particular
+particularly
+parting
+parts
+past
+pe
+per
+perhaps
+pf
+pg
+ph
+pk
+pl
+place
+placed
+places
+please
+plus
+pm
+pmid
+pn
+point
+pointed
+pointing
+points
+poorly
+possible
+possibly
+potentially
+pp
+pr
+predominantly
+present
+presented
+presenting
+presents
+presumably
+previously
+primarily
+probably
+problem
+problems
+promptly
+proud
+provided
+provides
+pt
+put
+puts
+pw
+py
+q
+qa
+que
+quickly
+quite
+qv
+r
+ran
+rather
+rd
+re
+readily
+really
+reasonably
+recent
+recently
+ref
+refs
+regarding
+regardless
+regards
+related
+relatively
+research
+reserved
+respectively
+resulted
+resulting
+results
+right
+ring
+ro
+room
+rooms
+round
+ru
+run
+rw
+s
+sa
+said
+same
+saw
+say
+saying
+says
+sb
+sc
+sd
+se
+sec
+second
+secondly
+seconds
+section
+see
+seeing
+seem
+seemed
+seeming
+seems
+seen
+sees
+self
+selves
+sensible
+sent
+serious
+seriously
+seven
+seventy
+several
+sg
+sh
+shall
+shan't
+shant
+she
+she'd
+she'll
+she's
+shed
+shell
+shes
+should
+should've
+shouldn
+shouldn't
+shouldnt
+show
+showed
+showing
+shown
+showns
+shows
+si
+side
+sides
+significant
+significantly
+similar
+similarly
+since
+sincere
+site
+six
+sixty
+sj
+sk
+sl
+slightly
+sm
+small
+smaller
+smallest
+sn
+so
+some
+somebody
+someday
+somehow
+someone
+somethan
+something
+sometime
+sometimes
+somewhat
+somewhere
+soon
+sorry
+specifically
+specified
+specify
+specifying
+sr
+st
+state
+states
+still
+stop
+strongly
+su
+sub
+substantially
+successfully
+such
+sufficiently
+suggest
+sup
+sure
+sv
+sy
+system
+sz
+t
+t's
+take
+taken
+taking
+tc
+td
+tell
+ten
+tends
+test
+text
+tf
+tg
+th
+than
+thank
+thanks
+thanx
+that
+that'll
+that's
+that've
+thatll
+thats
+thatve
+the
+their
+theirs
+them
+themselves
+then
+thence
+there
+there'd
+there'll
+there're
+there's
+there've
+thereafter
+thereby
+thered
+therefore
+therein
+therell
+thereof
+therere
+theres
+thereto
+thereupon
+thereve
+these
+they
+they'd
+they'll
+they're
+they've
+theyd
+theyll
+theyre
+theyve
+thick
+thin
+thing
+things
+think
+thinks
+third
+thirty
+this
+thorough
+thoroughly
+those
+thou
+though
+thoughh
+thought
+thoughts
+thousand
+three
+throug
+through
+throughout
+thru
+thus
+til
+till
+tip
+tis
+tj
+tk
+tm
+tn
+to
+today
+together
+too
+took
+top
+toward
+towards
+tp
+tr
+tried
+tries
+trillion
+truly
+try
+trying
+ts
+tt
+turn
+turned
+turning
+turns
+tv
+tw
+twas
+twelve
+twenty
+twice
+two
+tz
+u
+ua
+ug
+uk
+um
+un
+under
+underneath
+undoing
+unfortunately
+unless
+unlike
+unlikely
+until
+unto
+up
+upon
+ups
+upwards
+us
+use
+used
+useful
+usefully
+usefulness
+uses
+using
+usually
+uucp
+uy
+uz
+v
+va
+value
+various
+vc
+ve
+versus
+very
+vg
+vi
+via
+viz
+vn
+vol
+vols
+vs
+vu
+w
+want
+wanted
+wanting
+wants
+was
+wasn
+wasn't
+wasnt
+way
+ways
+we
+we'd
+we'll
+we're
+we've
+web
+webpage
+website
+wed
+welcome
+well
+wells
+went
+were
+weren
+weren't
+werent
+weve
+wf
+what
+what'd
+what'll
+what's
+what've
+whatever
+whatll
+whats
+whatve
+when
+when'd
+when'll
+when's
+whence
+whenever
+where
+where'd
+where'll
+where's
+whereafter
+whereas
+whereby
+wherein
+wheres
+whereupon
+wherever
+whether
+which
+whichever
+while
+whilst
+whim
+whither
+who
+who'd
+who'll
+who's
+whod
+whoever
+whole
+wholl
+whom
+whomever
+whos
+whose
+why
+why'd
+why'll
+why's
+widely
+width
+will
+willing
+wish
+with
+within
+without
+won
+won't
+wonder
+wont
+words
+work
+worked
+working
+works
+world
+would
+would've
+wouldn
+wouldn't
+wouldnt
+ws
+www
+x
+y
+ye
+year
+years
+yes
+yet
+you
+you'd
+you'll
+you're
+you've
+youd
+youll
+young
+younger
+youngest
+your
+youre
+yours
+yourself
+yourselves
+youve
+yt
+yu
+z
+za
+zero
+zm
+zr
diff --git a/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.ja.txt b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.ja.txt
new file mode 100644
index 0000000..0e74864
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.ja.txt
@@ -0,0 +1,134 @@
+あそこ
+あっ
+あの
+あのかた
+あの人
+あり
+あります
+ある
+あれ
+い
+いう
+います
+いる
+う
+うち
+え
+お
+および
+おり
+おります
+か
+かつて
+から
+が
+き
+ここ
+こちら
+こと
+この
+これ
+これら
+さ
+さらに
+し
+しかし
+する
+ず
+せ
+せる
+そこ
+そして
+その
+その他
+その後
+それ
+それぞれ
+それで
+た
+ただし
+たち
+ため
+たり
+だ
+だっ
+だれ
+つ
+て
+で
+でき
+できる
+です
+では
+でも
+と
+という
+といった
+とき
+ところ
+として
+とともに
+とも
+と共に
+どこ
+どの
+な
+ない
+なお
+なかっ
+ながら
+なく
+なっ
+など
+なに
+なら
+なり
+なる
+なん
+に
+において
+における
+について
+にて
+によって
+により
+による
+に対して
+に対する
+に関する
+の
+ので
+のみ
+は
+ば
+へ
+ほか
+ほとんど
+ほど
+ます
+また
+または
+まで
+も
+もの
+ものの
+や
+よう
+より
+ら
+られ
+られる
+れ
+れる
+を
+ん
+何
+及び
+彼
+彼女
+我々
+特に
+私
+私達
+貴方
+貴方方
diff --git a/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.zh.txt b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.zh.txt
new file mode 100644
index 0000000..15dea1c
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-data/stopwords-iso.zh.txt
@@ -0,0 +1,794 @@
+、
+。
+〈
+〉
+《
+》
+一
+一个
+一些
+一何
+一切
+一则
+一方面
+一旦
+一来
+一样
+一种
+一般
+一转眼
+七
+万一
+三
+上
+上下
+下
+不
+不仅
+不但
+不光
+不单
+不只
+不外乎
+不如
+不妨
+不尽
+不尽然
+不得
+不怕
+不惟
+不成
+不拘
+不料
+不是
+不比
+不然
+不特
+不独
+不管
+不至于
+不若
+不论
+不过
+不问
+与
+与其
+与其说
+与否
+与此同时
+且
+且不说
+且说
+两者
+个
+个别
+中
+临
+为
+为了
+为什么
+为何
+为止
+为此
+为着
+乃
+乃至
+乃至于
+么
+之
+之一
+之所以
+之类
+乌乎
+乎
+乘
+九
+也
+也好
+也罢
+了
+二
+二来
+于
+于是
+于是乎
+云云
+云尔
+五
+些
+亦
+人
+人们
+人家
+什
+什么
+什么样
+今
+介于
+仍
+仍旧
+从
+从此
+从而
+他
+他人
+他们
+他们们
+以
+以上
+以为
+以便
+以免
+以及
+以故
+以期
+以来
+以至
+以至于
+以致
+们
+任
+任何
+任凭
+会
+似的
+但
+但凡
+但是
+何
+何以
+何况
+何处
+何时
+余外
+作为
+你
+你们
+使
+使得
+例如
+依
+依据
+依照
+便于
+俺
+俺们
+倘
+倘使
+倘或
+倘然
+倘若
+借
+借傥然
+假使
+假如
+假若
+做
+像
+儿
+先不先
+光
+光是
+全体
+全部
+八
+六
+兮
+共
+关于
+关于具体地说
+其
+其一
+其中
+其二
+其他
+其余
+其它
+其次
+具体地说
+具体说来
+兼之
+内
+再
+再其次
+再则
+再有
+再者
+再者说
+再说
+冒
+冲
+况且
+几
+几时
+凡
+凡是
+凭
+凭借
+出于
+出来
+分
+分别
+则
+则甚
+别
+别人
+别处
+别是
+别的
+别管
+别说
+到
+前后
+前此
+前者
+加之
+加以
+区
+即
+即令
+即使
+即便
+即如
+即或
+即若
+却
+去
+又
+又及
+及
+及其
+及至
+反之
+反而
+反过来
+反过来说
+受到
+另
+另一方面
+另外
+另悉
+只
+只当
+只怕
+只是
+只有
+只消
+只要
+只限
+叫
+叮咚
+可
+可以
+可是
+可见
+各
+各个
+各位
+各种
+各自
+同
+同时
+后
+后者
+向
+向使
+向着
+吓
+吗
+否则
+吧
+吧哒
+含
+吱
+呀
+呃
+呕
+呗
+呜
+呜呼
+呢
+呵
+呵呵
+呸
+呼哧
+咋
+和
+咚
+咦
+咧
+咱
+咱们
+咳
+哇
+哈
+哈哈
+哉
+哎
+哎呀
+哎哟
+哗
+哟
+哦
+哩
+哪
+哪个
+哪些
+哪儿
+哪天
+哪年
+哪怕
+哪样
+哪边
+哪里
+哼
+哼唷
+唉
+唯有
+啊
+啐
+啥
+啦
+啪达
+啷当
+喂
+喏
+喔唷
+喽
+嗡
+嗡嗡
+嗬
+嗯
+嗳
+嘎
+嘎登
+嘘
+嘛
+嘻
+嘿
+嘿嘿
+四
+因
+因为
+因了
+因此
+因着
+因而
+固然
+在
+在下
+在于
+地
+基于
+处在
+多
+多么
+多少
+大
+大家
+她
+她们
+好
+如
+如上
+如上所述
+如下
+如何
+如其
+如同
+如是
+如果
+如此
+如若
+始而
+孰料
+孰知
+宁
+宁可
+宁愿
+宁肯
+它
+它们
+对
+对于
+对待
+对方
+对比
+将
+小
+尔
+尔后
+尔尔
+尚且
+就
+就是
+就是了
+就是说
+就算
+就要
+尽
+尽管
+尽管如此
+岂但
+己
+已
+已矣
+巴
+巴巴
+年
+并
+并且
+庶乎
+庶几
+开外
+开始
+归
+归齐
+当
+当地
+当然
+当着
+彼
+彼时
+彼此
+往
+待
+很
+得
+得了
+怎
+怎么
+怎么办
+怎么样
+怎奈
+怎样
+总之
+总的来看
+总的来说
+总的说来
+总而言之
+恰恰相反
+您
+惟其
+慢说
+我
+我们
+或
+或则
+或是
+或曰
+或者
+截至
+所
+所以
+所在
+所幸
+所有
+才
+才能
+打
+打从
+把
+抑或
+拿
+按
+按照
+换句话说
+换言之
+据
+据此
+接着
+故
+故此
+故而
+旁人
+无
+无宁
+无论
+既
+既往
+既是
+既然
+日
+时
+时候
+是
+是以
+是的
+更
+曾
+替
+替代
+最
+月
+有
+有些
+有关
+有及
+有时
+有的
+望
+朝
+朝着
+本
+本人
+本地
+本着
+本身
+来
+来着
+来自
+来说
+极了
+果然
+果真
+某
+某个
+某些
+某某
+根据
+欤
+正值
+正如
+正巧
+正是
+此
+此地
+此处
+此外
+此时
+此次
+此间
+毋宁
+每
+每当
+比
+比及
+比如
+比方
+没奈何
+沿
+沿着
+漫说
+点
+焉
+然则
+然后
+然而
+照
+照着
+犹且
+犹自
+甚且
+甚么
+甚或
+甚而
+甚至
+甚至于
+用
+用来
+由
+由于
+由是
+由此
+由此可见
+的
+的确
+的话
+直到
+相对而言
+省得
+看
+眨眼
+着
+着呢
+矣
+矣乎
+矣哉
+离
+秒
+称
+竟而
+第
+等
+等到
+等等
+简言之
+管
+类如
+紧接着
+纵
+纵令
+纵使
+纵然
+经
+经过
+结果
+给
+继之
+继后
+继而
+综上所述
+罢了
+者
+而
+而且
+而况
+而后
+而外
+而已
+而是
+而言
+能
+能否
+腾
+自
+自个儿
+自从
+自各儿
+自后
+自家
+自己
+自打
+自身
+至
+至于
+至今
+至若
+致
+般的
+若
+若夫
+若是
+若果
+若非
+莫不然
+莫如
+莫若
+虽
+虽则
+虽然
+虽说
+被
+要
+要不
+要不是
+要不然
+要么
+要是
+譬喻
+譬如
+让
+许多
+论
+设使
+设或
+设若
+诚如
+诚然
+该
+说
+说来
+请
+诸
+诸位
+诸如
+谁
+谁人
+谁料
+谁知
+贼死
+赖以
+赶
+起
+起见
+趁
+趁着
+越是
+距
+跟
+较
+较之
+边
+过
+还
+还是
+还有
+还要
+这
+这一来
+这个
+这么
+这么些
+这么样
+这么点儿
+这些
+这会儿
+这儿
+这就是说
+这时
+这样
+这次
+这般
+这边
+这里
+进而
+连
+连同
+逐步
+通过
+遵循
+遵照
+那
+那个
+那么
+那么些
+那么样
+那些
+那会儿
+那儿
+那时
+那样
+那般
+那边
+那里
+都
+鄙人
+鉴于
+针对
+阿
+除
+除了
+除外
+除开
+除此之外
+除非
+随
+随后
+随时
+随着
+难道说
+零
+非
+非但
+非徒
+非特
+非独
+靠
+顺
+顺着
+首先
+︿
+！
+＃
+＄
+％
+＆
+（
+）
+＊
+＋
+，
+０
+１
+２
+３
+４
+５
+６
+７
+８
+９
+：
+；
+＜
+＞
+？
+＠
+［
+］
+｛
+｜
+｝
+～
+￥
diff --git a/modules/story-summary/vector/utils/stopwords-patch.js b/modules/story-summary/vector/utils/stopwords-patch.js
new file mode 100644
index 0000000..51f7614
--- /dev/null
+++ b/modules/story-summary/vector/utils/stopwords-patch.js
@@ -0,0 +1,9 @@
+﻿// Small domain-level tuning surface.
+// Keep this file tiny: add/remove only words that are repeatedly noisy in real logs.
+
+// Extra stopwords on top of BASE_STOP_WORDS.
+export const DOMAIN_STOP_WORDS = [];
+
+// High-value words that must never be filtered as stopwords.
+// Default to empty for plugin-wide deployment; entity names are already protected dynamically.
+export const KEEP_WORDS = [];
diff --git a/modules/story-summary/vector/utils/tokenizer.js b/modules/story-summary/vector/utils/tokenizer.js
index a39e4e9..37ab59c 100644
--- a/modules/story-summary/vector/utils/tokenizer.js
+++ b/modules/story-summary/vector/utils/tokenizer.js
@@ -18,6 +18,8 @@
 
 import { extensionFolderPath } from '../../../../core/constants.js';
 import { xbLog } from '../../../../core/debug-core.js';
+import { BASE_STOP_WORDS } from './stopwords-base.js';
+import { DOMAIN_STOP_WORDS, KEEP_WORDS } from './stopwords-patch.js';
 
 const MODULE_ID = 'tokenizer';
 
@@ -61,44 +63,30 @@ let entityList = [];
 
 /** @type {Set<string>} 已注入结巴的实体（避免重复 add_word） */
 let injectedEntities = new Set();
+let entityKeepSet = new Set();
 
 // ═══════════════════════════════════════════════════════════════════════════
 // 停用词
 // ═══════════════════════════════════════════════════════════════════════════
 
-const STOP_WORDS = new Set([
-    // 中文高频虚词
-    '的', '了', '在', '是', '我', '有', '和', '就', '不', '人',
-    '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去',
-    '你', '会', '着', '没有', '看', '好', '自己', '这', '他', '她',
-    '它', '吗', '什么', '那', '里', '来', '吧', '呢', '啊', '哦',
-    '嗯', '呀', '哈', '嘿', '喂', '哎', '唉', '哇', '呃', '嘛',
-    '把', '被', '让', '给', '从', '向', '对', '跟', '比', '但',
-    '而', '或', '如果', '因为', '所以', '虽然', '但是', '然后',
-    '可以', '这样', '那样', '怎么', '为什么', '什么样', '哪里',
-    '时候', '现在', '已经', '还是', '只是', '可能', '应该', '知道',
-    '觉得', '开始', '一下', '一些', '这个', '那个', '他们', '我们',
-    '你们', '自己', '起来', '出来', '进去', '回来', '过来', '下去',
-    // 日语常见虚词（≥2字，匹配 TinySegmenter 产出粒度）
-    'です', 'ます', 'した', 'して', 'する', 'ない', 'いる', 'ある',
-    'なる', 'れる', 'られ', 'られる',
-    'この', 'その', 'あの', 'どの', 'ここ', 'そこ', 'あそこ',
-    'これ', 'それ', 'あれ', 'どれ',
-    'ても', 'から', 'まで', 'ので', 'のに', 'けど', 'だけ',
-    'もう', 'まだ', 'とても', 'ちょっと', 'やっぱり',
-    // 英文常见停用词
-    'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been',
-    'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will',
-    'would', 'could', 'should', 'may', 'might', 'can', 'shall',
-    'and', 'but', 'or', 'not', 'no', 'nor', 'so', 'yet',
-    'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from',
-    'it', 'its', 'he', 'she', 'his', 'her', 'they', 'them',
-    'this', 'that', 'these', 'those', 'i', 'me', 'my', 'you', 'your',
-    'we', 'our', 'if', 'then', 'than', 'when', 'what', 'which',
-    'who', 'how', 'where', 'there', 'here', 'all', 'each', 'every',
-    'both', 'few', 'more', 'most', 'other', 'some', 'such',
-    'only', 'own', 'same', 'just', 'very', 'also', 'about',
-]);
+const STATIC_KEEP_WORDS = new Set((KEEP_WORDS || [])
+    .map(w => String(w || '').trim().toLowerCase())
+    .filter(Boolean));
+
+// Standard source only: stopwords-iso snapshot + small domain patch.
+const EFFECTIVE_STOP_WORDS = new Set(
+    [...BASE_STOP_WORDS, ...DOMAIN_STOP_WORDS]
+        .map(w => String(w || '').trim().toLowerCase())
+        .filter(Boolean),
+);
+
+function shouldKeepTokenByWhitelist(token) {
+    const t = String(token || '').trim().toLowerCase();
+    if (!t) return false;
+    if (STATIC_KEEP_WORDS.has(t)) return true;
+    if (entityKeepSet.has(t)) return true;
+    return false;
+}
 
 // ═══════════════════════════════════════════════════════════════════════════
 // Unicode 分类
@@ -571,6 +559,7 @@ export function getState() {
 export function injectEntities(lexicon, displayMap) {
     if (!lexicon?.size) {
         entityList = [];
+        entityKeepSet = new Set();
         return;
     }
 
@@ -586,6 +575,7 @@ export function injectEntities(lexicon, displayMap) {
     // 按长度降序（最长匹配优先）
     entities.sort((a, b) => b.length - a.length);
     entityList = entities;
+    entityKeepSet = new Set(entities.map(e => String(e || '').trim().toLowerCase()).filter(Boolean));
 
     // 如果结巴已就绪，注入自定义词
     if (wasmState === WasmState.READY && jiebaAddWord) {
@@ -656,7 +646,7 @@ export function tokenize(text) {
 
         if (!cleaned) continue;
         if (cleaned.length < 2) continue;
-        if (STOP_WORDS.has(cleaned)) continue;
+        if (EFFECTIVE_STOP_WORDS.has(cleaned) && !shouldKeepTokenByWhitelist(cleaned)) continue;
         if (seen.has(cleaned)) continue;
 
         // 过滤纯标点/特殊字符
@@ -728,7 +718,7 @@ export function tokenizeForIndex(text) {
         .map(t => t.trim().toLowerCase())
         .filter(t => {
             if (!t || t.length < 2) return false;
-            if (STOP_WORDS.has(t)) return false;
+            if (EFFECTIVE_STOP_WORDS.has(t) && !shouldKeepTokenByWhitelist(t)) return false;
             if (/^[\s\x00-\x1F\p{P}\p{S}]+$/u.test(t)) return false;
             return true;
         });
@@ -744,6 +734,7 @@ export function tokenizeForIndex(text) {
  */
 export function reset() {
     entityList = [];
+    entityKeepSet = new Set();
     injectedEntities.clear();
     // 不重置 WASM 状态（避免重复加载）
 }