feat(recall): clamp focus weight and adjust pending context window

2026-02-11 17:21:04 +08:00
parent 297cc03770
commit 816196a710
3 changed files with 544 additions and 327 deletions
--- a/modules/story-summary/vector/retrieval/query-builder.js
+++ b/modules/story-summary/vector/retrieval/query-builder.js
@@ -2,8 +2,18 @@
 // query-builder.js - 确定性查询构建器（无 LLM）
 //
 // 职责：
-// 1. 从最近消息 + 实体词典构建 QueryBundle_v0
-// 2. 用第一轮召回结果增强为 QueryBundle_v1
+// 1. 从最近 3 条消息构建 QueryBundle（加权向量段）
+// 2. 用第一轮召回结果产出 hints 段用于 R2 增强
+//
+// 加权向量设计：
+// - 每条消息独立 embed，得到独立向量
+// - 按位置分配基础权重（焦点 > 近上下文 > 远上下文）
+// - 短消息通过 lengthFactor 自动降权（下限 35%）
+// - recall.js 负责 embed + 归一化 + 加权平均
+//
+// 焦点确定：
+// - pendingUserMessage 存在 → 它是焦点
+// - 否则 → lastMessages 最后一条是焦点
 //
 // 不负责：向量化、检索、rerank
 // ═══════════════════════════════════════════════════════════════════════════
@@ -15,12 +25,30 @@ import { filterText } from '../utils/text-filter.js';
 import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';

 // ─────────────────────────────────────────────────────────────────────────
-// 常量
+// 权重常量
+// ─────────────────────────────────────────────────────────────────────────
+
+// R1 基础权重：[...context(oldest→newest), focus]
+// 焦点消息占 55%，最近上下文 30%，更早上下文 15%
+export const FOCUS_BASE_WEIGHT = 0.55;
+export const CONTEXT_BASE_WEIGHTS = [0.15, 0.30];
+
+// R2 基础权重：焦点让权给 hints
+export const FOCUS_BASE_WEIGHT_R2 = 0.45;
+export const CONTEXT_BASE_WEIGHTS_R2 = [0.10, 0.20];
+export const HINTS_BASE_WEIGHT = 0.25;
+
+// 长度惩罚：< 50 字线性衰减，下限 35%
+export const LENGTH_FULL_THRESHOLD = 50;
+export const LENGTH_MIN_FACTOR = 0.35;
+// 归一化后的焦点最小占比（由 recall.js 在归一化后硬保底）
+// 语义：即使焦点文本很短，也不能被稀释到过低权重
+export const FOCUS_MIN_NORMALIZED_WEIGHT = 0.35;
+
+// ─────────────────────────────────────────────────────────────────────────
+// 其他常量
 // ─────────────────────────────────────────────────────────────────────────

-// Zero-darkbox policy:
-// - No internal truncation. We rely on model-side truncation / provider limits.
-// - If provider rejects due to length, we fail loudly and degrade explicitly.
 const MEMORY_HINT_ATOMS_MAX = 5;
 const MEMORY_HINT_EVENTS_MAX = 3;
 const LEXICAL_TERMS_MAX = 10;
@@ -41,14 +69,6 @@ function cleanMessageText(text) {
        .trim();
 }

-/**
- * 截断文本到指定长度
- * @param {string} text
- * @param {number} maxLen
- * @returns {string}
- */
-// truncate removed by design (zero-darkbox)
-
 /**
 * 清理事件摘要（移除楼层标记）
 * @param {string} summary
@@ -61,9 +81,23 @@ function cleanSummary(summary) {
 }

 /**
- * 从文本中提取高频实词（用于词法检索）
+ * 计算长度因子
 *
- * 使用统一分词器（结巴 + 实体保护 + 停用词过滤），按频率排序
+ * charCount >= 50 → 1.0
+ * charCount = 0  → 0.35
+ * 中间线性插值
+ *
+ * @param {number} charCount - 清洗后内容字符数（不含 speaker 前缀）
+ * @returns {number} 0.35 ~ 1.0
+ */
+export function computeLengthFactor(charCount) {
+    if (charCount >= LENGTH_FULL_THRESHOLD) return 1.0;
+    if (charCount <= 0) return LENGTH_MIN_FACTOR;
+    return LENGTH_MIN_FACTOR + (1.0 - LENGTH_MIN_FACTOR) * (charCount / LENGTH_FULL_THRESHOLD);
+}
+
+/**
+ * 从文本中提取高频实词（用于词法检索）
 *
 * @param {string} text - 清洗后的文本
 * @param {number} maxTerms - 最大词数
@@ -72,10 +106,7 @@ function cleanSummary(summary) {
 function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
    if (!text) return [];

-    // 使用统一分词器（索引用，不去重，保留词频）
    const tokens = tokenizerTokenizeForIndex(text);
-
-    // 统计词频
    const freq = new Map();
    for (const token of tokens) {
        const key = String(token || '').toLowerCase();
@@ -89,172 +120,203 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
        .map(([term]) => term);
 }

+// ─────────────────────────────────────────────────────────────────────────
+// 类型定义
+// ─────────────────────────────────────────────────────────────────────────
+
 /**
- * 构建 rerank 专用查询（纯自然语言，不带结构标签）
- *
- * reranker（bge-reranker-v2-m3）的 query 应为自然语言文本，
- * 不含 [ENTITIES] [DIALOGUE] 等结构标签。
- *
- * @param {string[]} focusEntities - 焦点实体
- * @param {object[]} lastMessages - 最近 K 条消息
- * @param {string|null} pendingUserMessage - 待发送的用户消息
- * @param {object} context - { name1, name2 }
- * @returns {string}
+ * @typedef {object} QuerySegment
+ * @property {string} text        - 待 embed 的文本（含 speaker 前缀，纯自然语言）
+ * @property {number} baseWeight  - R1 基础权重
+ * @property {number} charCount   - 内容字符数（不含 speaker 前缀，用于 lengthFactor）
 */
-function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) {
-    const parts = [];
-
-    // 实体提示
-    if (focusEntities.length > 0) {
-        parts.push(`关于${focusEntities.join('、')}。`);
-    }
-
-    // 最近对话原文
-    for (const m of (lastMessages || [])) {
-        const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
-        const clean = cleanMessageText(m.mes || '');
-        if (clean) {
-            parts.push(`${speaker}：${clean}`);
-        }
-    }
-
-    // 待发送消息
-    if (pendingUserMessage) {
-        const clean = cleanMessageText(pendingUserMessage);
-        if (clean) {
-            parts.push(`${context.name1 || '用户'}：${clean}`);
-        }
-    }
-
-    return parts.join('\n');
-}
-
-// ─────────────────────────────────────────────────────────────────────────
-// QueryBundle 类型定义（JSDoc）
-// ─────────────────────────────────────────────────────────────────────────

 /**
 * @typedef {object} QueryBundle
- * @property {string[]} focusEntities   - 焦点实体（原词形，已排除 name1）
- * @property {string}   queryText_v0    - 第一轮查询文本
- * @property {string|null} queryText_v1 - 第二轮查询文本（refinement 后填充）
- * @property {string}   rerankQuery     - rerank 用的短查询
+ * @property {QuerySegment[]}    querySegments  - R1 向量段（上下文 oldest→newest，焦点在末尾）
+ * @property {QuerySegment|null} hintsSegment   - R2 hints 段（refinement 后填充）
+ * @property {string}   rerankQuery     - rerank 用的纯自然语言查询（焦点在前）
 * @property {string[]} lexicalTerms    - MiniSearch 查询词
- * @property {Set<string>} _lexicon     - 实体词典（内部使用）
+ * @property {string[]} focusEntities   - 焦点实体（原词形，已排除 name1）
+ * @property {Set<string>}       _lexicon     - 实体词典（内部使用）
 * @property {Map<string, string>} _displayMap - 标准化→原词形映射（内部使用）
 */

 // ─────────────────────────────────────────────────────────────────────────
-// 阶段 1：构建 QueryBundle_v0
+// 内部：消息条目构建
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * @typedef {object} MessageEntry
+ * @property {string} text      - speaker：内容（完整文本）
+ * @property {number} charCount - 内容字符数（不含 speaker 前缀）
+ */
+
+/**
+ * 清洗消息并构建条目
+ * @param {object} message - chat 消息对象
+ * @param {object} context - { name1, name2 }
+ * @returns {MessageEntry|null}
+ */
+function buildMessageEntry(message, context) {
+    if (!message?.mes) return null;
+
+    const speaker = message.is_user
+        ? (context.name1 || '用户')
+        : (message.name || context.name2 || '角色');
+
+    const clean = cleanMessageText(message.mes);
+    if (!clean) return null;
+
+    return {
+        text: `${speaker}：${clean}`,
+        charCount: clean.length,
+    };
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 阶段 1：构建 QueryBundle
 // ─────────────────────────────────────────────────────────────────────────

 /**
 * 构建初始查询包
 *
- * @param {object[]} lastMessages - 最近 K=2 条消息
+ * 消息布局（K=3 时）：
+ *   msg[0] = USER(#N-2)  上下文    baseWeight = 0.15
+ *   msg[1] = AI(#N-1)    上下文    baseWeight = 0.30
+ *   msg[2] = USER(#N)    焦点      baseWeight = 0.55
+ *
+ * 焦点确定：
+ *   pendingUserMessage 存在 → 焦点，所有 lastMessages 为上下文
+ *   pendingUserMessage 不存在 → lastMessages[-1] 为焦点，其余为上下文
+ *
+ * @param {object[]} lastMessages - 最近 K 条消息（由 recall.js 传入）
 * @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息
- * @param {object|null} store - getSummaryStore() 返回值（可选，内部会自动获取）
- * @param {object|null} context - { name1, name2 }（可选，内部会自动获取）
+ * @param {object|null} store
+ * @param {object|null} context - { name1, name2 }
 * @returns {QueryBundle}
 */
 export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) {
-    // 自动获取 store 和 context
    if (!store) store = getSummaryStore();
    if (!context) {
        const ctx = getContext();
        context = { name1: ctx.name1, name2: ctx.name2 };
    }

-    // 1. 构建实体词典
+    // 1. 实体词典
    const lexicon = buildEntityLexicon(store, context);
    const displayMap = buildDisplayNameMap(store, context);

-    // 2. 清洗消息文本
-    const dialogueLines = [];
-    const allCleanText = [];
+    // 2. 分离焦点与上下文
+    const contextEntries = [];
+    let focusEntry = null;
+    const allCleanTexts = [];

-    for (const m of (lastMessages || [])) {
-        const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
-        const clean = cleanMessageText(m.mes || '');
-
-        if (clean) {
-            // 不使用楼层号，embedding 模型不需要
-            // 不截断，零暗箱
-            dialogueLines.push(`${speaker}: ${clean}`);
-            allCleanText.push(clean);
-        }
-    }
-
-    // 3. 处理 pendingUserMessage
-    let pendingClean = '';
    if (pendingUserMessage) {
-        pendingClean = cleanMessageText(pendingUserMessage);
+        // pending 是焦点，所有 lastMessages 是上下文
+        const pendingClean = cleanMessageText(pendingUserMessage);
        if (pendingClean) {
-            allCleanText.push(pendingClean);
+            const speaker = context.name1 || '用户';
+            focusEntry = {
+                text: `${speaker}：${pendingClean}`,
+                charCount: pendingClean.length,
+            };
+            allCleanTexts.push(pendingClean);
+        }
+
+        for (const m of (lastMessages || [])) {
+            const entry = buildMessageEntry(m, context);
+            if (entry) {
+                contextEntries.push(entry);
+                allCleanTexts.push(cleanMessageText(m.mes));
+            }
+        }
+    } else {
+        // 无 pending → lastMessages[-1] 是焦点
+        const msgs = lastMessages || [];
+
+        if (msgs.length > 0) {
+            const lastMsg = msgs[msgs.length - 1];
+            const entry = buildMessageEntry(lastMsg, context);
+            if (entry) {
+                focusEntry = entry;
+                allCleanTexts.push(cleanMessageText(lastMsg.mes));
+            }
+        }
+
+        for (let i = 0; i < msgs.length - 1; i++) {
+            const entry = buildMessageEntry(msgs[i], context);
+            if (entry) {
+                contextEntries.push(entry);
+                allCleanTexts.push(cleanMessageText(msgs[i].mes));
+            }
        }
    }

-    // 4. 提取焦点实体
-    const combinedText = allCleanText.join(' ');
+    // 3. 提取焦点实体
+    const combinedText = allCleanTexts.join(' ');
    const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);

-    // 5. 构建 queryText_v0
-    const queryParts = [];
+    // 4. 构建 querySegments
+    //    上下文在前（oldest → newest），焦点在末尾
+    //    上下文权重从 CONTEXT_BASE_WEIGHTS 尾部对齐分配
+    const querySegments = [];

-    if (focusEntities.length > 0) {
-        queryParts.push(`[ENTITIES]\n${focusEntities.join('\n')}`);
+    for (let i = 0; i < contextEntries.length; i++) {
+        const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS.length - contextEntries.length + i);
+        querySegments.push({
+            text: contextEntries[i].text,
+            baseWeight: CONTEXT_BASE_WEIGHTS[weightIdx] || CONTEXT_BASE_WEIGHTS[0],
+            charCount: contextEntries[i].charCount,
+        });
    }

-    if (dialogueLines.length > 0) {
-        queryParts.push(`[DIALOGUE]\n${dialogueLines.join('\n')}`);
+    if (focusEntry) {
+        querySegments.push({
+            text: focusEntry.text,
+            baseWeight: FOCUS_BASE_WEIGHT,
+            charCount: focusEntry.charCount,
+        });
    }

-    if (pendingClean) {
-        // 不截断，零暗箱
-        queryParts.push(`[PENDING_USER]\n${pendingClean}`);
-    }
+    // 5. rerankQuery（焦点在前，纯自然语言，无前缀）
+    const contextLines = contextEntries.map(e => e.text);
+    const rerankQuery = focusEntry
+        ? [focusEntry.text, ...contextLines].join('\n')
+        : contextLines.join('\n');

-    const queryText_v0 = queryParts.join('\n\n');
-
-    // 6. rerankQuery 独立构建（纯自然语言，供 reranker 使用）
-    const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context);
-
-    // 7. 构建 lexicalTerms
+    // 6. lexicalTerms（实体优先 + 高频实词补充）
    const entityTerms = focusEntities.map(e => e.toLowerCase());
    const textTerms = extractKeyTerms(combinedText);
-
-    // 合并去重：实体优先
    const termSet = new Set(entityTerms);
    for (const t of textTerms) {
        if (termSet.size >= LEXICAL_TERMS_MAX) break;
        termSet.add(t);
    }

-    const lexicalTerms = Array.from(termSet);
-
    return {
-        focusEntities,
-        queryText_v0,
-        queryText_v1: null,
+        querySegments,
+        hintsSegment: null,
        rerankQuery,
-        lexicalTerms,
+        lexicalTerms: Array.from(termSet),
+        focusEntities,
        _lexicon: lexicon,
        _displayMap: displayMap,
    };
 }

 // ─────────────────────────────────────────────────────────────────────────
-// 阶段 3：Query Refinement（用第一轮召回结果增强）
+// 阶段 3：Query Refinement（用第一轮召回结果产出 hints 段）
 // ─────────────────────────────────────────────────────────────────────────

 /**
 * 用第一轮召回结果增强 QueryBundle
 *
 * 原地修改 bundle：
- * - queryText_v1 = queryText_v0 + [MEMORY_HINTS]
- * - focusEntities 可能扩展（从 anchorHits 的 subject/object 中补充）
- * - rerankQuery 追加 memory hints 关键词
- * - lexicalTerms 追加 memory hints 关键词
+ * - hintsSegment：填充 hints 段（供 R2 加权使用）
+ * - focusEntities：可能从 anchor hits 的 subject/object 中扩展
+ * - lexicalTerms：可能追加 hints 中的关键词
+ * - rerankQuery：不变（保持焦点优先的纯自然语言）
 *
 * @param {QueryBundle} bundle - 原始查询包
 * @param {object[]} anchorHits - 第一轮 L0 命中（按相似度降序）
@@ -267,10 +329,7 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
    const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX);
    for (const hit of topAnchors) {
        const semantic = hit.atom?.semantic || '';
-        if (semantic) {
-            // 不截断，零暗箱
-            hints.push(semantic);
-        }
+        if (semantic) hints.push(semantic);
    }

    // 2. 从 top eventHits 提取 memory hints
@@ -282,18 +341,19 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
        const line = title && summary
            ? `${title}: ${summary}`
            : title || summary;
-        if (line) {
-            // 不截断，零暗箱
-            hints.push(line);
-        }
+        if (line) hints.push(line);
    }

-    // 3. 构建 queryText_v1（Hints 前置，最优先）
+    // 3. 构建 hintsSegment
    if (hints.length > 0) {
-        const hintText = `[MEMORY_HINTS]\n${hints.join('\n')}`;
-        bundle.queryText_v1 = hintText + `\n\n` + bundle.queryText_v0;
+        const hintsText = hints.join('\n');
+        bundle.hintsSegment = {
+            text: hintsText,
+            baseWeight: HINTS_BASE_WEIGHT,
+            charCount: hintsText.length,
+        };
    } else {
-        bundle.queryText_v1 = bundle.queryText_v0;
+        bundle.hintsSegment = null;
    }

    // 4. 从 anchorHits 补充 focusEntities
@@ -307,10 +367,13 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
            const atom = hit.atom;
            if (!atom) continue;

-            // 检查 subject 和 object
            for (const field of [atom.subject, atom.object]) {
                if (!field) continue;
-                const norm = String(field).normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase();
+                const norm = String(field)
+                    .normalize('NFKC')
+                    .replace(/[\u200B-\u200D\uFEFF]/g, '')
+                    .trim()
+                    .toLowerCase();
                if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) {
                    existingSet.add(norm);
                    const display = displayMap?.get(norm) || field;
@@ -320,8 +383,9 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
        }
    }

-    // 5. rerankQuery 保持独立（不随 refinement 变更）
-    // reranker 需要纯自然语言 query，不受 memory hints 干扰
+    // 5. rerankQuery 不变
+    //    cross-encoder 接收纯自然语言 query，不受 hints 干扰
+    //    焦点消息始终在前，保证 reranker 内部截断时保留最关键内容

    // 6. 增强 lexicalTerms
    if (hints.length > 0) {