Refine tokenizer preload and metrics ratio

This commit is contained in:
2026-02-09 20:58:48 +08:00
parent 0a28539b29
commit 36ba66a523
2 changed files with 27 additions and 17 deletions

View File

@@ -45,7 +45,7 @@ import { runSummaryGeneration } from "./generate/generator.js";
import { embed, getEngineFingerprint, testOnlineService } from "./vector/utils/embedder.js";
// tokenizer
import { preload as preloadTokenizer, injectEntities } from "./vector/utils/tokenizer.js";
import { preload as preloadTokenizer, injectEntities, isReady as isTokenizerReady } from "./vector/utils/tokenizer.js";
// entity lexicon
import { buildEntityLexicon, buildDisplayNameMap } from "./vector/retrieval/entity-lexicon.js";
@@ -152,14 +152,11 @@ const EXT_PROMPT_KEY = "LittleWhiteBox_StorySummary";
const MIN_INJECTION_DEPTH = 2;
// ═══════════════════════════════════════════════════════════════════════════
// 分词器预热
// 分词器预热(依赖 tokenizer.js 内部状态机,支持失败重试)
// ═══════════════════════════════════════════════════════════════════════════
/** 是否已触发过预热 */
let tokenizerPreloaded = false;
function maybePreloadTokenizer() {
if (tokenizerPreloaded) return;
if (isTokenizerReady()) return;
const vectorCfg = getVectorConfig();
if (!vectorCfg?.enabled) return;
@@ -167,11 +164,12 @@ function maybePreloadTokenizer() {
tokenizerPreloaded = true;
preloadTokenizer()
.then((ok) => {
if (ok) tokenizerPreloaded = true;
if (ok) {
xbLog.info(MODULE_ID, "分词器预热成功");
}
})
.catch((e) => {
// 不置 tokenizerPreloaded允许后续重试例如用户修复路径/刷新后)
xbLog.warn(MODULE_ID, "分词器预热失败(将降级运行,可稍后重试)", e);
xbLog.warn(MODULE_ID, "分词器预热失败(将降级运行,可稀后重试)", e);
});
}
@@ -1048,7 +1046,6 @@ function handleFrameMessage(event) {
case "VECTOR_GENERATE":
if (data.config) saveVectorConfig(data.config);
// 向量配置变更,可能刚启用,触发预热
maybePreloadTokenizer();
refreshEntityLexiconAndWarmup();
handleGenerateVectors(data.config);
@@ -1139,7 +1136,6 @@ function handleFrameMessage(event) {
case "REQUEST_VECTOR_STATS":
sendVectorStatsToFrame();
// 向量开关可能在 iframe 中被修改,检查是否需要预热
maybePreloadTokenizer();
break;
@@ -1378,6 +1374,15 @@ async function handleGenerationStarted(type, _params, isDryRun) {
clearExtensionPrompt();
// ★ 最后一道关卡:向量启用时,同步等待分词器就绪
if (vectorCfg?.enabled && !isTokenizerReady()) {
try {
await preloadTokenizer();
} catch (e) {
xbLog.warn(MODULE_ID, "生成前分词器预热失败,将使用降级分词", e);
}
}
// 判断是否使用缓存的用户消息30秒内有效
let pendingUserMessage = null;
if (type === "normal" && lastSentUserMessage && (Date.now() - lastSentTimestamp < 30000)) {
@@ -1521,6 +1526,5 @@ jQuery(() => {
registerEvents();
initStateIntegration();
// 条件预热分词器storySummary 已启用,检查 vector 是否也启用)
maybePreloadTokenizer();
});

View File

@@ -430,12 +430,18 @@ export function detectIssues(metrics) {
// ─────────────────────────────────────────────────────────────────
if (m.event.considered > 0) {
const selectRatio = m.event.selected / m.event.considered;
if (selectRatio < 0.1) {
issues.push(`Event selection ratio too low (${(selectRatio * 100).toFixed(1)}%) - threshold may be too high`);
// 只统计 Dense 路选中direct + relatedLexical 是额外补充不计入
const denseSelected =
(m.event.byRecallType?.direct || 0) +
(m.event.byRecallType?.related || 0);
const denseSelectRatio = denseSelected / m.event.considered;
if (denseSelectRatio < 0.1) {
issues.push(`Dense event selection ratio too low (${(denseSelectRatio * 100).toFixed(1)}%) - threshold may be too high`);
}
if (selectRatio > 0.6 && m.event.considered > 10) {
issues.push(`Event selection ratio high (${(selectRatio * 100).toFixed(1)}%) - may include noise`);
if (denseSelectRatio > 0.6 && m.event.considered > 10) {
issues.push(`Dense event selection ratio high (${(denseSelectRatio * 100).toFixed(1)}%) - may include noise`);
}
}