feat(recall): clamp focus weight and adjust pending context window

2026-02-11 17:21:04 +08:00
parent 297cc03770
commit 816196a710
3 changed files with 544 additions and 327 deletions
--- a/modules/story-summary/vector/retrieval/metrics.js
+++ b/modules/story-summary/vector/retrieval/metrics.js
@@ -1,16 +1,12 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Metrics Collector (v4 - Two-Stage: L0 Locate → L1 Evidence)
+// Story Summary - Metrics Collector (v5 - Weighted Query + Floor Aggregation)
 //
-// 命名规范：
-// - 存储层用 L0/L1/L2/L3（StateAtom/Chunk/Event/Fact）
-// - 指标层用语义名称：anchor/evidence/event/constraint/arc
-//
-// 架构变更（v3 → v4）：
-// - evidence 区块反映 L0-only 融合 + L1 按楼层拉取的两阶段架构
-// - 删除 mergedByType / selectedByType（不再有混合池）
-// - 新增 floorCandidates / floorsSelected / l0Collected / l1Pulled / l1Attached / l1CosineTime
-// - fusion 区块明确标注 L0-only（删除 anchorCount）
-// - quality.chunkRealRatio → quality.l1AttachRate
+// v4 → v5 变更：
+// - query: 新增 segmentWeights / r2Weights（加权向量诊断）
+// - fusion: 新增 denseAggMethod / lexDensityBonus（聚合策略可观测）
+// - quality: 新增 rerankRetentionRate（粗排-精排一致性）
+// - 移除 timing 中从未写入的死字段（queryBuild/queryRefine/lexicalSearch/fusion）
+// - 移除从未写入的 arc 区块
 // ═══════════════════════════════════════════════════════════════════════════

 /**
@@ -25,9 +21,11 @@ export function createMetrics() {
            refineTime: 0,
            lengths: {
                v0Chars: 0,
-                v1Chars: null,     // null = NA
+                v1Chars: null,     // null = 无 hints
                rerankChars: 0,
            },
+            segmentWeights: [],    // R1 归一化后权重 [context..., focus]
+            r2Weights: null,       // R2 归一化后权重 [context..., focus, hints]（null = 无 hints）
        },

        // Anchor (L0 StateAtoms) - 语义锚点
@@ -55,6 +53,8 @@ export function createMetrics() {
            totalUnique: 0,
            afterCap: 0,
            time: 0,
+            denseAggMethod: '',    // 聚合方法描述（如 "max×0.6+mean×0.4"）
+            lexDensityBonus: 0,    // 密度加成系数
        },

        // Constraint (L3 Facts) - 世界约束
@@ -83,34 +83,28 @@ export function createMetrics() {
        // Evidence (Two-Stage: Floor rerank → L1 pull) - 原文证据
        evidence: {
            // Stage 1: Floor
-            floorCandidates: 0,    // W-RRF 融合后的 floor 候选数
-            floorsSelected: 0,     // rerank 后选中的 floor 数
-            l0Collected: 0,        // 选中 floor 中收集的 L0 atom 总数
+            floorCandidates: 0,
+            floorsSelected: 0,
+            l0Collected: 0,
            rerankApplied: false,
            rerankFailed: false,
            beforeRerank: 0,
            afterRerank: 0,
            rerankTime: 0,
            rerankScores: null,
-            rerankDocAvgLength: 0, // rerank document 平均字符数
+            rerankDocAvgLength: 0,

            // Stage 2: L1
-            l1Pulled: 0,           // 从 DB 拉取的 L1 chunk 总数
-            l1Attached: 0,         // 实际挂载的 L1 数（top-1 × floor × 2侧）
-            l1CosineTime: 0,       // L1 cosine 打分耗时
+            l1Pulled: 0,
+            l1Attached: 0,
+            l1CosineTime: 0,

            // 装配
-            contextPairsAdded: 0,  // USER 侧挂载数量
+            contextPairsAdded: 0,
            tokens: 0,
            assemblyTime: 0,
        },

-        // Arc - 人物弧光
-        arc: {
-            injected: 0,
-            tokens: 0,
-        },
-
        // Formatting - 格式化
        formatting: {
            sectionsIncluded: [],
@@ -131,13 +125,9 @@ export function createMetrics() {
            },
        },

-        // Timing - 计时
+        // Timing - 计时（仅包含实际写入的字段）
        timing: {
-            queryBuild: 0,
-            queryRefine: 0,
            anchorSearch: 0,
-            lexicalSearch: 0,
-            fusion: 0,
            constraintFilter: 0,
            eventRetrieval: 0,
            evidenceRetrieval: 0,
@@ -151,7 +141,8 @@ export function createMetrics() {
        quality: {
            constraintCoverage: 100,
            eventPrecisionProxy: 0,
-            l1AttachRate: 0,       // 有 L1 挂载的 floor 占比
+            l1AttachRate: 0,
+            rerankRetentionRate: 0,
            potentialIssues: [],
        },
    };
@@ -178,6 +169,16 @@ export function calcSimilarityStats(similarities) {
    };
 }

+/**
+ * 格式化权重数组为紧凑字符串
+ * @param {number[]|null} weights
+ * @returns {string}
+ */
+function fmtWeights(weights) {
+    if (!weights?.length) return 'N/A';
+    return '[' + weights.map(w => (typeof w === 'number' ? w.toFixed(3) : String(w))).join(', ') + ']';
+}
+
 /**
 * 格式化指标为可读日志
 * @param {object} metrics
@@ -189,21 +190,27 @@ export function formatMetricsLog(metrics) {

    lines.push('');
    lines.push('════════════════════════════════════════');
-    lines.push('        Recall Metrics Report (v4)      ');
+    lines.push('        Recall Metrics Report (v5)      ');
    lines.push('════════════════════════════════════════');
    lines.push('');

    // Query Length
    lines.push('[Query Length] 查询长度');
    lines.push(`├─ query_v0_chars: ${m.query?.lengths?.v0Chars ?? 0}`);
-    lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'NA' : m.query.lengths.v1Chars}`);
+    lines.push(`├─ query_v1_chars: ${m.query?.lengths?.v1Chars == null ? 'N/A' : m.query.lengths.v1Chars}`);
    lines.push(`└─ rerank_query_chars: ${m.query?.lengths?.rerankChars ?? 0}`);
    lines.push('');

    // Query Build
    lines.push('[Query] 查询构建');
    lines.push(`├─ build_time: ${m.query.buildTime}ms`);
-    lines.push(`└─ refine_time: ${m.query.refineTime}ms`);
+    lines.push(`├─ refine_time: ${m.query.refineTime}ms`);
+    lines.push(`├─ r1_weights: ${fmtWeights(m.query.segmentWeights)}`);
+    if (m.query.r2Weights) {
+        lines.push(`└─ r2_weights: ${fmtWeights(m.query.r2Weights)}`);
+    } else {
+        lines.push(`└─ r2_weights: N/A (no hints)`);
+    }
    lines.push('');

    // Anchor (L0 StateAtoms)
@@ -228,7 +235,13 @@ export function formatMetricsLog(metrics) {
    // Fusion (W-RRF, floor-level)
    lines.push('[Fusion] W-RRF (floor-level) - 多路融合');
    lines.push(`├─ dense_floors: ${m.fusion.denseFloors}`);
+    if (m.fusion.denseAggMethod) {
+        lines.push(`│   └─ aggregation: ${m.fusion.denseAggMethod}`);
+    }
    lines.push(`├─ lex_floors: ${m.fusion.lexFloors}`);
+    if (m.fusion.lexDensityBonus > 0) {
+        lines.push(`│   └─ density_bonus: ${m.fusion.lexDensityBonus}`);
+    }
    lines.push(`├─ total_unique: ${m.fusion.totalUnique}`);
    lines.push(`├─ after_cap: ${m.fusion.afterCap}`);
    lines.push(`└─ time: ${m.fusion.time}ms`);
@@ -313,14 +326,6 @@ export function formatMetricsLog(metrics) {
    lines.push(`└─ assembly_time: ${m.evidence.assemblyTime}ms`);
    lines.push('');

-    // Arc
-    if (m.arc.injected > 0) {
-        lines.push('[Arc] 人物弧光');
-        lines.push(`├─ injected: ${m.arc.injected}`);
-        lines.push(`└─ tokens: ${m.arc.tokens}`);
-        lines.push('');
-    }
-
    // Formatting
    lines.push('[Formatting] 格式化');
    lines.push(`├─ sections: [${(m.formatting.sectionsIncluded || []).join(', ')}]`);
@@ -363,6 +368,7 @@ export function formatMetricsLog(metrics) {
    lines.push(`├─ constraint_coverage: ${m.quality.constraintCoverage}%`);
    lines.push(`├─ event_precision_proxy: ${m.quality.eventPrecisionProxy}`);
    lines.push(`├─ l1_attach_rate: ${m.quality.l1AttachRate}%`);
+    lines.push(`├─ rerank_retention_rate: ${m.quality.rerankRetentionRate}%`);

    if (m.quality.potentialIssues && m.quality.potentialIssues.length > 0) {
        lines.push(`└─ potential_issues:`);
@@ -398,6 +404,19 @@ export function detectIssues(metrics) {
        issues.push('No focus entities extracted - entity lexicon may be empty or messages too short');
    }

+    // 权重极端退化检测
+    const segWeights = m.query.segmentWeights || [];
+    if (segWeights.length > 0) {
+        const focusWeight = segWeights[segWeights.length - 1] || 0;
+        if (focusWeight < 0.15) {
+            issues.push(`Focus segment weight very low (${(focusWeight * 100).toFixed(0)}%) - focus message may be too short`);
+        }
+        const allLow = segWeights.every(w => w < 0.1);
+        if (allLow) {
+            issues.push('All segment weights below 10% - all messages may be extremely short');
+        }
+    }
+
    // ─────────────────────────────────────────────────────────────────
    // 锚点匹配问题
    // ─────────────────────────────────────────────────────────────────
@@ -494,6 +513,16 @@ export function detectIssues(metrics) {
        }
    }

+    // Rerank 保留率
+    const retentionRate = m.evidence.floorCandidates > 0
+        ? Math.round(m.evidence.floorsSelected / m.evidence.floorCandidates * 100)
+        : 0;
+    m.quality.rerankRetentionRate = retentionRate;
+
+    if (m.evidence.floorCandidates > 0 && retentionRate < 25) {
+        issues.push(`Low rerank retention rate (${retentionRate}%) - fusion ranking poorly aligned with reranker`);
+    }
+
    // ─────────────────────────────────────────────────────────────────
    // L1 挂载问题
    // ─────────────────────────────────────────────────────────────────
--- a/modules/story-summary/vector/retrieval/query-builder.js
+++ b/modules/story-summary/vector/retrieval/query-builder.js
@@ -2,8 +2,18 @@
 // query-builder.js - 确定性查询构建器（无 LLM）
 //
 // 职责：
-// 1. 从最近消息 + 实体词典构建 QueryBundle_v0
-// 2. 用第一轮召回结果增强为 QueryBundle_v1
+// 1. 从最近 3 条消息构建 QueryBundle（加权向量段）
+// 2. 用第一轮召回结果产出 hints 段用于 R2 增强
+//
+// 加权向量设计：
+// - 每条消息独立 embed，得到独立向量
+// - 按位置分配基础权重（焦点 > 近上下文 > 远上下文）
+// - 短消息通过 lengthFactor 自动降权（下限 35%）
+// - recall.js 负责 embed + 归一化 + 加权平均
+//
+// 焦点确定：
+// - pendingUserMessage 存在 → 它是焦点
+// - 否则 → lastMessages 最后一条是焦点
 //
 // 不负责：向量化、检索、rerank
 // ═══════════════════════════════════════════════════════════════════════════
@@ -15,12 +25,30 @@ import { filterText } from '../utils/text-filter.js';
 import { tokenizeForIndex as tokenizerTokenizeForIndex } from '../utils/tokenizer.js';

 // ─────────────────────────────────────────────────────────────────────────
-// 常量
+// 权重常量
+// ─────────────────────────────────────────────────────────────────────────
+
+// R1 基础权重：[...context(oldest→newest), focus]
+// 焦点消息占 55%，最近上下文 30%，更早上下文 15%
+export const FOCUS_BASE_WEIGHT = 0.55;
+export const CONTEXT_BASE_WEIGHTS = [0.15, 0.30];
+
+// R2 基础权重：焦点让权给 hints
+export const FOCUS_BASE_WEIGHT_R2 = 0.45;
+export const CONTEXT_BASE_WEIGHTS_R2 = [0.10, 0.20];
+export const HINTS_BASE_WEIGHT = 0.25;
+
+// 长度惩罚：< 50 字线性衰减，下限 35%
+export const LENGTH_FULL_THRESHOLD = 50;
+export const LENGTH_MIN_FACTOR = 0.35;
+// 归一化后的焦点最小占比（由 recall.js 在归一化后硬保底）
+// 语义：即使焦点文本很短，也不能被稀释到过低权重
+export const FOCUS_MIN_NORMALIZED_WEIGHT = 0.35;
+
+// ─────────────────────────────────────────────────────────────────────────
+// 其他常量
 // ─────────────────────────────────────────────────────────────────────────

-// Zero-darkbox policy:
-// - No internal truncation. We rely on model-side truncation / provider limits.
-// - If provider rejects due to length, we fail loudly and degrade explicitly.
 const MEMORY_HINT_ATOMS_MAX = 5;
 const MEMORY_HINT_EVENTS_MAX = 3;
 const LEXICAL_TERMS_MAX = 10;
@@ -41,14 +69,6 @@ function cleanMessageText(text) {
        .trim();
 }

-/**
- * 截断文本到指定长度
- * @param {string} text
- * @param {number} maxLen
- * @returns {string}
- */
-// truncate removed by design (zero-darkbox)
-
 /**
 * 清理事件摘要（移除楼层标记）
 * @param {string} summary
@@ -61,9 +81,23 @@ function cleanSummary(summary) {
 }

 /**
- * 从文本中提取高频实词（用于词法检索）
+ * 计算长度因子
 *
- * 使用统一分词器（结巴 + 实体保护 + 停用词过滤），按频率排序
+ * charCount >= 50 → 1.0
+ * charCount = 0  → 0.35
+ * 中间线性插值
+ *
+ * @param {number} charCount - 清洗后内容字符数（不含 speaker 前缀）
+ * @returns {number} 0.35 ~ 1.0
+ */
+export function computeLengthFactor(charCount) {
+    if (charCount >= LENGTH_FULL_THRESHOLD) return 1.0;
+    if (charCount <= 0) return LENGTH_MIN_FACTOR;
+    return LENGTH_MIN_FACTOR + (1.0 - LENGTH_MIN_FACTOR) * (charCount / LENGTH_FULL_THRESHOLD);
+}
+
+/**
+ * 从文本中提取高频实词（用于词法检索）
 *
 * @param {string} text - 清洗后的文本
 * @param {number} maxTerms - 最大词数
@@ -72,10 +106,7 @@ function cleanSummary(summary) {
 function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
    if (!text) return [];

-    // 使用统一分词器（索引用，不去重，保留词频）
    const tokens = tokenizerTokenizeForIndex(text);
-
-    // 统计词频
    const freq = new Map();
    for (const token of tokens) {
        const key = String(token || '').toLowerCase();
@@ -89,172 +120,203 @@ function extractKeyTerms(text, maxTerms = LEXICAL_TERMS_MAX) {
        .map(([term]) => term);
 }

+// ─────────────────────────────────────────────────────────────────────────
+// 类型定义
+// ─────────────────────────────────────────────────────────────────────────
+
 /**
- * 构建 rerank 专用查询（纯自然语言，不带结构标签）
- *
- * reranker（bge-reranker-v2-m3）的 query 应为自然语言文本，
- * 不含 [ENTITIES] [DIALOGUE] 等结构标签。
- *
- * @param {string[]} focusEntities - 焦点实体
- * @param {object[]} lastMessages - 最近 K 条消息
- * @param {string|null} pendingUserMessage - 待发送的用户消息
- * @param {object} context - { name1, name2 }
- * @returns {string}
+ * @typedef {object} QuerySegment
+ * @property {string} text        - 待 embed 的文本（含 speaker 前缀，纯自然语言）
+ * @property {number} baseWeight  - R1 基础权重
+ * @property {number} charCount   - 内容字符数（不含 speaker 前缀，用于 lengthFactor）
 */
-function buildRerankQuery(focusEntities, lastMessages, pendingUserMessage, context) {
-    const parts = [];
-
-    // 实体提示
-    if (focusEntities.length > 0) {
-        parts.push(`关于${focusEntities.join('、')}。`);
-    }
-
-    // 最近对话原文
-    for (const m of (lastMessages || [])) {
-        const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
-        const clean = cleanMessageText(m.mes || '');
-        if (clean) {
-            parts.push(`${speaker}：${clean}`);
-        }
-    }
-
-    // 待发送消息
-    if (pendingUserMessage) {
-        const clean = cleanMessageText(pendingUserMessage);
-        if (clean) {
-            parts.push(`${context.name1 || '用户'}：${clean}`);
-        }
-    }
-
-    return parts.join('\n');
-}
-
-// ─────────────────────────────────────────────────────────────────────────
-// QueryBundle 类型定义（JSDoc）
-// ─────────────────────────────────────────────────────────────────────────

 /**
 * @typedef {object} QueryBundle
- * @property {string[]} focusEntities   - 焦点实体（原词形，已排除 name1）
- * @property {string}   queryText_v0    - 第一轮查询文本
- * @property {string|null} queryText_v1 - 第二轮查询文本（refinement 后填充）
- * @property {string}   rerankQuery     - rerank 用的短查询
+ * @property {QuerySegment[]}    querySegments  - R1 向量段（上下文 oldest→newest，焦点在末尾）
+ * @property {QuerySegment|null} hintsSegment   - R2 hints 段（refinement 后填充）
+ * @property {string}   rerankQuery     - rerank 用的纯自然语言查询（焦点在前）
 * @property {string[]} lexicalTerms    - MiniSearch 查询词
- * @property {Set<string>} _lexicon     - 实体词典（内部使用）
+ * @property {string[]} focusEntities   - 焦点实体（原词形，已排除 name1）
+ * @property {Set<string>}       _lexicon     - 实体词典（内部使用）
 * @property {Map<string, string>} _displayMap - 标准化→原词形映射（内部使用）
 */

 // ─────────────────────────────────────────────────────────────────────────
-// 阶段 1：构建 QueryBundle_v0
+// 内部：消息条目构建
+// ─────────────────────────────────────────────────────────────────────────
+
+/**
+ * @typedef {object} MessageEntry
+ * @property {string} text      - speaker：内容（完整文本）
+ * @property {number} charCount - 内容字符数（不含 speaker 前缀）
+ */
+
+/**
+ * 清洗消息并构建条目
+ * @param {object} message - chat 消息对象
+ * @param {object} context - { name1, name2 }
+ * @returns {MessageEntry|null}
+ */
+function buildMessageEntry(message, context) {
+    if (!message?.mes) return null;
+
+    const speaker = message.is_user
+        ? (context.name1 || '用户')
+        : (message.name || context.name2 || '角色');
+
+    const clean = cleanMessageText(message.mes);
+    if (!clean) return null;
+
+    return {
+        text: `${speaker}：${clean}`,
+        charCount: clean.length,
+    };
+}
+
+// ─────────────────────────────────────────────────────────────────────────
+// 阶段 1：构建 QueryBundle
 // ─────────────────────────────────────────────────────────────────────────

 /**
 * 构建初始查询包
 *
- * @param {object[]} lastMessages - 最近 K=2 条消息
+ * 消息布局（K=3 时）：
+ *   msg[0] = USER(#N-2)  上下文    baseWeight = 0.15
+ *   msg[1] = AI(#N-1)    上下文    baseWeight = 0.30
+ *   msg[2] = USER(#N)    焦点      baseWeight = 0.55
+ *
+ * 焦点确定：
+ *   pendingUserMessage 存在 → 焦点，所有 lastMessages 为上下文
+ *   pendingUserMessage 不存在 → lastMessages[-1] 为焦点，其余为上下文
+ *
+ * @param {object[]} lastMessages - 最近 K 条消息（由 recall.js 传入）
 * @param {string|null} pendingUserMessage - 用户刚输入但未进 chat 的消息
- * @param {object|null} store - getSummaryStore() 返回值（可选，内部会自动获取）
- * @param {object|null} context - { name1, name2 }（可选，内部会自动获取）
+ * @param {object|null} store
+ * @param {object|null} context - { name1, name2 }
 * @returns {QueryBundle}
 */
 export function buildQueryBundle(lastMessages, pendingUserMessage, store = null, context = null) {
-    // 自动获取 store 和 context
    if (!store) store = getSummaryStore();
    if (!context) {
        const ctx = getContext();
        context = { name1: ctx.name1, name2: ctx.name2 };
    }

-    // 1. 构建实体词典
+    // 1. 实体词典
    const lexicon = buildEntityLexicon(store, context);
    const displayMap = buildDisplayNameMap(store, context);

-    // 2. 清洗消息文本
-    const dialogueLines = [];
-    const allCleanText = [];
+    // 2. 分离焦点与上下文
+    const contextEntries = [];
+    let focusEntry = null;
+    const allCleanTexts = [];

-    for (const m of (lastMessages || [])) {
-        const speaker = m.is_user ? (context.name1 || '用户') : (m.name || context.name2 || '角色');
-        const clean = cleanMessageText(m.mes || '');
-
-        if (clean) {
-            // 不使用楼层号，embedding 模型不需要
-            // 不截断，零暗箱
-            dialogueLines.push(`${speaker}: ${clean}`);
-            allCleanText.push(clean);
-        }
-    }
-
-    // 3. 处理 pendingUserMessage
-    let pendingClean = '';
    if (pendingUserMessage) {
-        pendingClean = cleanMessageText(pendingUserMessage);
+        // pending 是焦点，所有 lastMessages 是上下文
+        const pendingClean = cleanMessageText(pendingUserMessage);
        if (pendingClean) {
-            allCleanText.push(pendingClean);
+            const speaker = context.name1 || '用户';
+            focusEntry = {
+                text: `${speaker}：${pendingClean}`,
+                charCount: pendingClean.length,
+            };
+            allCleanTexts.push(pendingClean);
+        }
+
+        for (const m of (lastMessages || [])) {
+            const entry = buildMessageEntry(m, context);
+            if (entry) {
+                contextEntries.push(entry);
+                allCleanTexts.push(cleanMessageText(m.mes));
+            }
+        }
+    } else {
+        // 无 pending → lastMessages[-1] 是焦点
+        const msgs = lastMessages || [];
+
+        if (msgs.length > 0) {
+            const lastMsg = msgs[msgs.length - 1];
+            const entry = buildMessageEntry(lastMsg, context);
+            if (entry) {
+                focusEntry = entry;
+                allCleanTexts.push(cleanMessageText(lastMsg.mes));
+            }
+        }
+
+        for (let i = 0; i < msgs.length - 1; i++) {
+            const entry = buildMessageEntry(msgs[i], context);
+            if (entry) {
+                contextEntries.push(entry);
+                allCleanTexts.push(cleanMessageText(msgs[i].mes));
+            }
        }
    }

-    // 4. 提取焦点实体
-    const combinedText = allCleanText.join(' ');
+    // 3. 提取焦点实体
+    const combinedText = allCleanTexts.join(' ');
    const focusEntities = extractEntitiesFromText(combinedText, lexicon, displayMap);

-    // 5. 构建 queryText_v0
-    const queryParts = [];
+    // 4. 构建 querySegments
+    //    上下文在前（oldest → newest），焦点在末尾
+    //    上下文权重从 CONTEXT_BASE_WEIGHTS 尾部对齐分配
+    const querySegments = [];

-    if (focusEntities.length > 0) {
-        queryParts.push(`[ENTITIES]\n${focusEntities.join('\n')}`);
+    for (let i = 0; i < contextEntries.length; i++) {
+        const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS.length - contextEntries.length + i);
+        querySegments.push({
+            text: contextEntries[i].text,
+            baseWeight: CONTEXT_BASE_WEIGHTS[weightIdx] || CONTEXT_BASE_WEIGHTS[0],
+            charCount: contextEntries[i].charCount,
+        });
    }

-    if (dialogueLines.length > 0) {
-        queryParts.push(`[DIALOGUE]\n${dialogueLines.join('\n')}`);
+    if (focusEntry) {
+        querySegments.push({
+            text: focusEntry.text,
+            baseWeight: FOCUS_BASE_WEIGHT,
+            charCount: focusEntry.charCount,
+        });
    }

-    if (pendingClean) {
-        // 不截断，零暗箱
-        queryParts.push(`[PENDING_USER]\n${pendingClean}`);
-    }
+    // 5. rerankQuery（焦点在前，纯自然语言，无前缀）
+    const contextLines = contextEntries.map(e => e.text);
+    const rerankQuery = focusEntry
+        ? [focusEntry.text, ...contextLines].join('\n')
+        : contextLines.join('\n');

-    const queryText_v0 = queryParts.join('\n\n');
-
-    // 6. rerankQuery 独立构建（纯自然语言，供 reranker 使用）
-    const rerankQuery = buildRerankQuery(focusEntities, dialogueLines.length > 0 ? lastMessages : [], pendingUserMessage, context);
-
-    // 7. 构建 lexicalTerms
+    // 6. lexicalTerms（实体优先 + 高频实词补充）
    const entityTerms = focusEntities.map(e => e.toLowerCase());
    const textTerms = extractKeyTerms(combinedText);
-
-    // 合并去重：实体优先
    const termSet = new Set(entityTerms);
    for (const t of textTerms) {
        if (termSet.size >= LEXICAL_TERMS_MAX) break;
        termSet.add(t);
    }

-    const lexicalTerms = Array.from(termSet);
-
    return {
-        focusEntities,
-        queryText_v0,
-        queryText_v1: null,
+        querySegments,
+        hintsSegment: null,
        rerankQuery,
-        lexicalTerms,
+        lexicalTerms: Array.from(termSet),
+        focusEntities,
        _lexicon: lexicon,
        _displayMap: displayMap,
    };
 }

 // ─────────────────────────────────────────────────────────────────────────
-// 阶段 3：Query Refinement（用第一轮召回结果增强）
+// 阶段 3：Query Refinement（用第一轮召回结果产出 hints 段）
 // ─────────────────────────────────────────────────────────────────────────

 /**
 * 用第一轮召回结果增强 QueryBundle
 *
 * 原地修改 bundle：
- * - queryText_v1 = queryText_v0 + [MEMORY_HINTS]
- * - focusEntities 可能扩展（从 anchorHits 的 subject/object 中补充）
- * - rerankQuery 追加 memory hints 关键词
- * - lexicalTerms 追加 memory hints 关键词
+ * - hintsSegment：填充 hints 段（供 R2 加权使用）
+ * - focusEntities：可能从 anchor hits 的 subject/object 中扩展
+ * - lexicalTerms：可能追加 hints 中的关键词
+ * - rerankQuery：不变（保持焦点优先的纯自然语言）
 *
 * @param {QueryBundle} bundle - 原始查询包
 * @param {object[]} anchorHits - 第一轮 L0 命中（按相似度降序）
@@ -267,10 +329,7 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
    const topAnchors = (anchorHits || []).slice(0, MEMORY_HINT_ATOMS_MAX);
    for (const hit of topAnchors) {
        const semantic = hit.atom?.semantic || '';
-        if (semantic) {
-            // 不截断，零暗箱
-            hints.push(semantic);
-        }
+        if (semantic) hints.push(semantic);
    }

    // 2. 从 top eventHits 提取 memory hints
@@ -282,18 +341,19 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
        const line = title && summary
            ? `${title}: ${summary}`
            : title || summary;
-        if (line) {
-            // 不截断，零暗箱
-            hints.push(line);
-        }
+        if (line) hints.push(line);
    }

-    // 3. 构建 queryText_v1（Hints 前置，最优先）
+    // 3. 构建 hintsSegment
    if (hints.length > 0) {
-        const hintText = `[MEMORY_HINTS]\n${hints.join('\n')}`;
-        bundle.queryText_v1 = hintText + `\n\n` + bundle.queryText_v0;
+        const hintsText = hints.join('\n');
+        bundle.hintsSegment = {
+            text: hintsText,
+            baseWeight: HINTS_BASE_WEIGHT,
+            charCount: hintsText.length,
+        };
    } else {
-        bundle.queryText_v1 = bundle.queryText_v0;
+        bundle.hintsSegment = null;
    }

    // 4. 从 anchorHits 补充 focusEntities
@@ -307,10 +367,13 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
            const atom = hit.atom;
            if (!atom) continue;

-            // 检查 subject 和 object
            for (const field of [atom.subject, atom.object]) {
                if (!field) continue;
-                const norm = String(field).normalize('NFKC').replace(/[\u200B-\u200D\uFEFF]/g, '').trim().toLowerCase();
+                const norm = String(field)
+                    .normalize('NFKC')
+                    .replace(/[\u200B-\u200D\uFEFF]/g, '')
+                    .trim()
+                    .toLowerCase();
                if (norm.length >= 2 && lexicon.has(norm) && !existingSet.has(norm)) {
                    existingSet.add(norm);
                    const display = displayMap?.get(norm) || field;
@@ -320,8 +383,9 @@ export function refineQueryBundle(bundle, anchorHits, eventHits) {
        }
    }

-    // 5. rerankQuery 保持独立（不随 refinement 变更）
-    // reranker 需要纯自然语言 query，不受 memory hints 干扰
+    // 5. rerankQuery 不变
+    //    cross-encoder 接收纯自然语言 query，不受 hints 干扰
+    //    焦点消息始终在前，保证 reranker 内部截断时保留最关键内容

    // 6. 增强 lexicalTerms
    if (hints.length > 0) {
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -1,15 +1,22 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Recall Engine (v7 - Two-Stage: L0 Locate → L1 Evidence)
+// Story Summary - Recall Engine (v8 - Weighted Query Vectors + Floor Aggregation)
 //
 // 命名规范：
 // - 存储层用 L0/L1/L2/L3（StateAtom/Chunk/Event/Fact）
 // - 召回层用语义名称：anchor/evidence/event/constraint
 //
+// v7 → v8 变更：
+// - Query 取 3 条消息（对齐 L0 对结构），加权向量合成替代文本拼接
+// - R1 权重 [0.15, 0.30, 0.55]（焦点 > 近上下文 > 远上下文）
+// - R2 复用 R1 向量 + embed hints 1 条，权重 [0.10, 0.20, 0.45, 0.25]
+// - Dense floor 聚合：max → maxSim×0.6 + meanSim×0.4
+// - Lexical floor 聚合：max → maxScore × (1 + 0.3×log₂(hitCount))
+//
 // 架构：
 // 阶段 1: Query Build（确定性，无 LLM）
-// 阶段 2: Round 1 Dense Retrieval（L0 + L2）
-// 阶段 3: Query Refinement（用已命中记忆增强）
-// 阶段 4: Round 2 Dense Retrieval（L0 + L2）
+// 阶段 2: Round 1 Dense Retrieval（batch embed 3 段 → 加权平均）
+// 阶段 3: Query Refinement（用已命中记忆产出 hints 段）
+// 阶段 4: Round 2 Dense Retrieval（复用 R1 vec + embed hints → 加权平均）
 // 阶段 5: Lexical Retrieval
 // 阶段 6: Floor W-RRF Fusion + Rerank + L1 配对
 // 阶段 7: L1 配对组装（L0 → top-1 AI L1 + top-1 USER L1）
@@ -21,7 +28,14 @@ import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
 import { getEngineFingerprint, embed } from '../utils/embedder.js';
 import { xbLog } from '../../../../core/debug-core.js';
 import { getContext } from '../../../../../../../extensions.js';
-import { buildQueryBundle, refineQueryBundle } from './query-builder.js';
+import {
+    buildQueryBundle,
+    refineQueryBundle,
+    computeLengthFactor,
+    FOCUS_BASE_WEIGHT_R2,
+    CONTEXT_BASE_WEIGHTS_R2,
+    FOCUS_MIN_NORMALIZED_WEIGHT,
+} from './query-builder.js';
 import { getLexicalIndex, searchLexicalIndex } from './lexical-index.js';
 import { rerankChunks } from '../llm/reranker.js';
 import { createMetrics, calcSimilarityStats } from './metrics.js';
@@ -33,8 +47,9 @@ const MODULE_ID = 'recall';
 // ═══════════════════════════════════════════════════════════════════════════

 const CONFIG = {
-    // 窗口
-    LAST_MESSAGES_K: 2,
+    // 窗口：取 3 条消息（对齐 L0 USER+AI 对结构）
+    LAST_MESSAGES_K: 3,
+    LAST_MESSAGES_K_WITH_PENDING: 2, // pending 存在时只取 2 条上下文，避免形成 4 段

    // Anchor (L0 StateAtoms)
    ANCHOR_MIN_SIMILARITY: 0.58,
@@ -51,6 +66,13 @@ const CONFIG = {
    RRF_W_LEX: 0.9,
    FUSION_CAP: 60,

+    // Dense floor 聚合权重
+    DENSE_AGG_W_MAX: 0.6,
+    DENSE_AGG_W_MEAN: 0.4,
+
+    // Lexical floor 聚合密度加成
+    LEX_DENSITY_BONUS: 0.3,
+
    // Rerank（floor-level）
    RERANK_TOP_N: 20,
    RERANK_MIN_SCORE: 0.15,
@@ -66,9 +88,6 @@ const CONFIG = {

 /**
 * 计算余弦相似度
- * @param {number[]} a
- * @param {number[]} b
- * @returns {number}
 */
 function cosineSimilarity(a, b) {
    if (!a?.length || !b?.length || a.length !== b.length) return 0;
@@ -83,8 +102,6 @@ function cosineSimilarity(a, b) {

 /**
 * 标准化字符串
- * @param {string} s
- * @returns {string}
 */
 function normalize(s) {
    return String(s || '')
@@ -96,12 +113,8 @@ function normalize(s) {

 /**
 * 获取最近消息
- * @param {object[]} chat
- * @param {number} count
- * @param {boolean} excludeLastAi
- * @returns {object[]}
 */
-function getLastMessages(chat, count = 2, excludeLastAi = false) {
+function getLastMessages(chat, count = 3, excludeLastAi = false) {
    if (!chat?.length) return [];
    let messages = [...chat];
    if (excludeLastAi && messages.length > 0 && !messages[messages.length - 1]?.is_user) {
@@ -111,18 +124,128 @@ function getLastMessages(chat, count = 2, excludeLastAi = false) {
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// MMR 选择算法
+// 加权向量工具
 // ═══════════════════════════════════════════════════════════════════════════

 /**
- * Maximal Marginal Relevance 选择
- * @param {object[]} candidates
- * @param {number} k
- * @param {number} lambda
- * @param {Function} getVector
- * @param {Function} getScore
- * @returns {object[]}
+ * 多向量加权平均
+ *
+ * @param {number[][]} vectors - 向量数组
+ * @param {number[]} weights  - 归一化后的权重（sum = 1）
+ * @returns {number[]|null}
 */
+function weightedAverageVectors(vectors, weights) {
+    if (!vectors?.length || !weights?.length || vectors.length !== weights.length) return null;
+
+    const dims = vectors[0].length;
+    const result = new Array(dims).fill(0);
+
+    for (let i = 0; i < vectors.length; i++) {
+        const w = weights[i];
+        const v = vectors[i];
+        if (!v?.length) continue;
+        for (let d = 0; d < dims; d++) {
+            result[d] += w * v[d];
+        }
+    }
+
+    return result;
+}
+
+/**
+ * 对归一化权重做“目标位最小占比”硬保底
+ *
+ * @param {number[]} weights   - 已归一化权重（sum≈1）
+ * @param {number} targetIdx   - 目标位置（focus 段索引）
+ * @param {number} minWeight   - 最小占比（0~1）
+ * @returns {number[]} 调整后的归一化权重
+ */
+function clampMinNormalizedWeight(weights, targetIdx, minWeight) {
+    if (!weights?.length) return [];
+    if (targetIdx < 0 || targetIdx >= weights.length) return weights;
+
+    const current = weights[targetIdx];
+    if (current >= minWeight) return weights;
+
+    const otherSum = 1 - current;
+    if (otherSum <= 0) {
+        const out = new Array(weights.length).fill(0);
+        out[targetIdx] = 1;
+        return out;
+    }
+
+    const remain = 1 - minWeight;
+    const scale = remain / otherSum;
+
+    const out = weights.map((w, i) => (i === targetIdx ? minWeight : w * scale));
+    // 数值稳定性：消除浮点误差
+    const drift = 1 - out.reduce((a, b) => a + b, 0);
+    out[targetIdx] += drift;
+    return out;
+}
+
+/**
+ * 计算 R1 段权重（baseWeight × lengthFactor，归一化）
+ *
+ * @param {object[]} segments - QuerySegment[]
+ * @returns {number[]} 归一化后的权重
+ */
+function computeSegmentWeights(segments) {
+    if (!segments?.length) return [];
+
+    const adjusted = segments.map(s => s.baseWeight * computeLengthFactor(s.charCount));
+    const sum = adjusted.reduce((a, b) => a + b, 0);
+    const normalized = sum <= 0
+        ? segments.map(() => 1 / segments.length)
+        : adjusted.map(w => w / sum);
+
+    // focus 段始终在末尾
+    const focusIdx = segments.length - 1;
+    return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT);
+}
+
+/**
+ * 计算 R2 权重（R1 段用 R2 基础权重 + hints 段，归一化）
+ *
+ * @param {object[]} segments     - QuerySegment[]（与 R1 相同的段）
+ * @param {object|null} hintsSegment - { text, baseWeight, charCount }
+ * @returns {number[]} 归一化后的权重（长度 = segments.length + (hints ? 1 : 0)）
+ */
+function computeR2Weights(segments, hintsSegment) {
+    if (!segments?.length) return [];
+
+    // 为 R1 段分配 R2 基础权重（尾部对齐）
+    const contextCount = segments.length - 1;
+    const r2Base = [];
+    for (let i = 0; i < contextCount; i++) {
+        const weightIdx = Math.max(0, CONTEXT_BASE_WEIGHTS_R2.length - contextCount + i);
+        r2Base.push(CONTEXT_BASE_WEIGHTS_R2[weightIdx] || CONTEXT_BASE_WEIGHTS_R2[0]);
+    }
+    r2Base.push(FOCUS_BASE_WEIGHT_R2);
+
+    // 应用 lengthFactor
+    const adjusted = r2Base.map((w, i) => w * computeLengthFactor(segments[i].charCount));
+
+    // 追加 hints
+    if (hintsSegment) {
+        adjusted.push(hintsSegment.baseWeight * computeLengthFactor(hintsSegment.charCount));
+    }
+
+    // 归一化
+    const sum = adjusted.reduce((a, b) => a + b, 0);
+    const normalized = sum <= 0
+        ? adjusted.map(() => 1 / adjusted.length)
+        : adjusted.map(w => w / sum);
+
+    // R2 中 focus 位置固定为“segments 最后一个”
+    const focusIdx = segments.length - 1;
+    return clampMinNormalizedWeight(normalized, focusIdx, FOCUS_MIN_NORMALIZED_WEIGHT);
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// MMR 选择算法
+// ═══════════════════════════════════════════════════════════════════════════
+
 function mmrSelect(candidates, k, lambda, getVector, getScore) {
    const selected = [];
    const ids = new Set();
@@ -166,13 +289,6 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
 // [Anchors] L0 StateAtoms 检索
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 检索语义锚点
- * @param {number[]} queryVector
- * @param {object} vectorConfig
- * @param {object|null} metrics
- * @returns {Promise<{hits: object[], floors: Set<number>}>}
- */
 async function recallAnchors(queryVector, vectorConfig, metrics) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length) {
@@ -228,15 +344,6 @@ async function recallAnchors(queryVector, vectorConfig, metrics) {
 // [Events] L2 Events 检索
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 检索事件
- * @param {number[]} queryVector
- * @param {object[]} allEvents
- * @param {object} vectorConfig
- * @param {string[]} focusEntities
- * @param {object|null} metrics
- * @returns {Promise<object[]>}
- */
 async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities, metrics) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length || !allEvents?.length) {
@@ -344,11 +451,6 @@ async function recallEvents(queryVector, allEvents, vectorConfig, focusEntities,
 // [Causation] 因果链追溯
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 构建事件索引
- * @param {object[]} allEvents
- * @returns {Map<string, object>}
- */
 function buildEventIndex(allEvents) {
    const map = new Map();
    for (const e of allEvents || []) {
@@ -357,13 +459,6 @@ function buildEventIndex(allEvents) {
    return map;
 }

-/**
- * 追溯因果链
- * @param {object[]} eventHits
- * @param {Map<string, object>} eventIndex
- * @param {number} maxDepth
- * @returns {{results: object[], maxDepth: number}}
- */
 function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MAX_DEPTH) {
    const out = new Map();
    const idRe = /^evt-\d+$/;
@@ -411,23 +506,9 @@ function traceCausation(eventHits, eventIndex, maxDepth = CONFIG.CAUSAL_CHAIN_MA
 }

 // ═══════════════════════════════════════════════════════════════════════════
-// [W-RRF] 加权倒数排名融合（L0-only）
+// [W-RRF] 加权倒数排名融合（floor 粒度）
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * @typedef {object} RankedItem
- * @property {string} id - 唯一标识符
- * @property {number} score - 该路的原始分数
- */
-
-/**
- * W-RRF 加权倒数排名融合（floor 粒度）
- *
- * @param {{id: number, score: number}[]} denseRank  - Dense 路（floor → max cosine，降序）
- * @param {{id: number, score: number}[]} lexRank    - Lexical 路（floor → max bm25，降序）
- * @param {number} cap - 输出上限
- * @returns {{top: {id: number, fusionScore: number}[], totalUnique: number}}
- */
 function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) {
    const k = CONFIG.RRF_K;
    const wD = CONFIG.RRF_W_DENSE;
@@ -464,16 +545,6 @@ function fuseByFloor(denseRank, lexRank, cap = CONFIG.FUSION_CAP) {
 // [Stage 6] Floor 融合 + Rerank + L1 配对
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * Floor 粒度融合 + Rerank + L1 配对
- *
- * @param {object[]} anchorHits - L0 dense 命中（Round 2）
- * @param {number[]} queryVector - 查询向量（v1）
- * @param {string} rerankQuery - rerank 查询文本（纯自然语言）
- * @param {object} lexicalResult - 词法检索结果
- * @param {object} metrics
- * @returns {Promise<{l0Selected: object[], l1ByFloor: Map<number, {aiTop1: object|null, userTop1: object|null}>}>}
- */
 async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexicalResult, metrics) {
    const { chatId, chat, name1, name2 } = getContext();
    if (!chatId) return { l0Selected: [], l1ByFloor: new Map() };
@@ -481,26 +552,36 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    const T_Start = performance.now();

    // ─────────────────────────────────────────────────────────────────
-    // 6a. Dense floor rank（每个 floor 取 max cosine）
+    // 6a. Dense floor rank（加权聚合：maxSim×0.6 + meanSim×0.4）
    // ─────────────────────────────────────────────────────────────────

-    const denseFloorMap = new Map();
+    const denseFloorAgg = new Map();
    for (const a of (anchorHits || [])) {
-        const cur = denseFloorMap.get(a.floor) || 0;
-        if (a.similarity > cur) denseFloorMap.set(a.floor, a.similarity);
+        const cur = denseFloorAgg.get(a.floor);
+        if (!cur) {
+            denseFloorAgg.set(a.floor, { maxSim: a.similarity, hitCount: 1, sumSim: a.similarity });
+        } else {
+            cur.maxSim = Math.max(cur.maxSim, a.similarity);
+            cur.hitCount++;
+            cur.sumSim += a.similarity;
+        }
    }

-    const denseFloorRank = [...denseFloorMap.entries()]
-        .sort((a, b) => b[1] - a[1])
-        .map(([floor, score]) => ({ id: floor, score }));
+    const denseFloorRank = [...denseFloorAgg.entries()]
+        .map(([floor, info]) => ({
+            id: floor,
+            score: info.maxSim * CONFIG.DENSE_AGG_W_MAX
+                + (info.sumSim / info.hitCount) * CONFIG.DENSE_AGG_W_MEAN,
+        }))
+        .sort((a, b) => b.score - a.score);

    // ─────────────────────────────────────────────────────────────────
-    // 6b. Lexical floor rank（chunkScores → floor 聚合 + USER→AI 映射 + 预过滤）
+    // 6b. Lexical floor rank（密度加成：maxScore × (1 + 0.3×log₂(hitCount))）
    // ─────────────────────────────────────────────────────────────────

    const atomFloorSet = new Set(getStateAtoms().map(a => a.floor));

-    const lexFloorScores = new Map();
+    const lexFloorAgg = new Map();
    for (const { chunkId, score } of (lexicalResult?.chunkScores || [])) {
        const match = chunkId?.match(/^c-(\d+)-/);
        if (!match) continue;
@@ -519,13 +600,21 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        // 预过滤：必须有 L0 atoms
        if (!atomFloorSet.has(floor)) continue;

-        const cur = lexFloorScores.get(floor) || 0;
-        if (score > cur) lexFloorScores.set(floor, score);
+        const cur = lexFloorAgg.get(floor);
+        if (!cur) {
+            lexFloorAgg.set(floor, { maxScore: score, hitCount: 1 });
+        } else {
+            cur.maxScore = Math.max(cur.maxScore, score);
+            cur.hitCount++;
+        }
    }

-    const lexFloorRank = [...lexFloorScores.entries()]
-        .sort((a, b) => b[1] - a[1])
-        .map(([floor, score]) => ({ id: floor, score }));
+    const lexFloorRank = [...lexFloorAgg.entries()]
+        .map(([floor, info]) => ({
+            id: floor,
+            score: info.maxScore * (1 + CONFIG.LEX_DENSITY_BONUS * Math.log2(Math.max(1, info.hitCount))),
+        }))
+        .sort((a, b) => b.score - a.score);

    // ─────────────────────────────────────────────────────────────────
    // 6c. Floor W-RRF 融合
@@ -541,6 +630,8 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        metrics.fusion.totalUnique = totalUnique;
        metrics.fusion.afterCap = fusedFloors.length;
        metrics.fusion.time = fusionTime;
+        metrics.fusion.denseAggMethod = `max×${CONFIG.DENSE_AGG_W_MAX}+mean×${CONFIG.DENSE_AGG_W_MEAN}`;
+        metrics.fusion.lexDensityBonus = CONFIG.LEX_DENSITY_BONUS;
        metrics.evidence.floorCandidates = fusedFloors.length;
    }

@@ -617,7 +708,7 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    }

    // ─────────────────────────────────────────────────────────────────
-    // 6f. 并发 Rerank
+    // 6f. Rerank
    // ─────────────────────────────────────────────────────────────────

    const T_Rerank_Start = performance.now();
@@ -647,7 +738,6 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
            };
        }

-        // document 平均长度
        if (rerankCandidates.length > 0) {
            const totalLen = rerankCandidates.reduce((s, c) => s + (c.text?.length || 0), 0);
            metrics.evidence.rerankDocAvgLength = Math.round(totalLen / rerankCandidates.length);
@@ -666,6 +756,13 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
        atomsByFloor.get(atom.floor).push(atom);
    }

+    // 重建 denseFloorMap 以获取每层 max cosine（用于 L0 similarity 标注）
+    const denseFloorMaxMap = new Map();
+    for (const a of (anchorHits || [])) {
+        const cur = denseFloorMaxMap.get(a.floor) || 0;
+        if (a.similarity > cur) denseFloorMaxMap.set(a.floor, a.similarity);
+    }
+
    const l0Selected = [];
    const l1ByFloor = new Map();
    let contextPairsAdded = 0;
@@ -673,9 +770,9 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic
    for (const item of reranked) {
        const floor = item.floor;
        const rerankScore = item._rerankScore || 0;
-        const denseSim = denseFloorMap.get(floor) || 0;
+        const denseSim = denseFloorMaxMap.get(floor) || 0;

-        // 收集该 floor 所有 L0 atoms，共享 floor 的 rerankScore
+        // 收集该 floor 所有 L0 atoms
        const floorAtoms = atomsByFloor.get(floor) || [];
        for (const atom of floorAtoms) {
            l0Selected.push({
@@ -735,22 +832,14 @@ async function locateAndPullEvidence(anchorHits, queryVector, rerankQuery, lexic

    return { l0Selected, l1ByFloor };
 }
-// [L1] 拉取 + Cosine 打分（并发子任务）
+
+// ═══════════════════════════════════════════════════════════════════════════
+// [L1] 拉取 + Cosine 打分
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 从 IndexedDB 拉取指定楼层的 L1 chunks + 向量，用 queryVector cosine 打分
- *
- * @param {string} chatId
- * @param {number[]} floors - 需要拉取的楼层列表
- * @param {number[]} queryVector - 查询向量（v1）
- * @param {object[]} chat - 聊天消息数组
- * @returns {Promise<Map<number, object[]>>} floor → scored chunks（带 _cosineScore）
- */
 async function pullAndScoreL1(chatId, floors, queryVector, chat) {
    const T0 = performance.now();

-    /** @type {Map<number, object[]>} */
    const result = new Map();

    if (!chatId || !floors?.length || !queryVector?.length) {
@@ -758,7 +847,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
        return result;
    }

-    // 拉取 chunks
    let dbChunks = [];
    try {
        dbChunks = await getChunksByFloors(chatId, floors);
@@ -773,7 +861,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
        return result;
    }

-    // 拉取向量
    const chunkIds = dbChunks.map(c => c.chunkId);
    let chunkVectors = [];
    try {
@@ -786,7 +873,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {

    const vectorMap = new Map(chunkVectors.map(v => [v.chunkId, v.vector]));

-    // Cosine 打分 + 按楼层分组
    for (const chunk of dbChunks) {
        const vec = vectorMap.get(chunk.chunkId);
        const cosineScore = vec?.length ? cosineSimilarity(queryVector, vec) : 0;
@@ -807,7 +893,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
        result.get(chunk.floor).push(scored);
    }

-    // 每楼层按 cosine 降序排序
    for (const [, chunks] of result) {
        chunks.sort((a, b) => b._cosineScore - a._cosineScore);
    }
@@ -825,16 +910,6 @@ async function pullAndScoreL1(chatId, floors, queryVector, chat) {
 // 主函数
 // ═══════════════════════════════════════════════════════════════════════════

-/**
- * 执行记忆召回
- *
- * @param {object[]} allEvents - 所有事件（L2）
- * @param {object} vectorConfig - 向量配置
- * @param {object} options
- * @param {boolean} options.excludeLastAi
- * @param {string|null} options.pendingUserMessage
- * @returns {Promise<object>}
- */
 export async function recallMemory(allEvents, vectorConfig, options = {}) {
    const T0 = performance.now();
    const { chat } = getContext();
@@ -865,7 +940,10 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {

    const T_Build_Start = performance.now();

-    const lastMessages = getLastMessages(chat, CONFIG.LAST_MESSAGES_K, excludeLastAi);
+    const lastMessagesCount = pendingUserMessage
+        ? CONFIG.LAST_MESSAGES_K_WITH_PENDING
+        : CONFIG.LAST_MESSAGES_K;
+    const lastMessages = getLastMessages(chat, lastMessagesCount, excludeLastAi);

    const bundle = buildQueryBundle(lastMessages, pendingUserMessage);

@@ -873,29 +951,39 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    metrics.anchor.focusEntities = bundle.focusEntities;

    if (metrics.query?.lengths) {
-        metrics.query.lengths.v0Chars = String(bundle.queryText_v0 || '').length;
+        metrics.query.lengths.v0Chars = bundle.querySegments.reduce((sum, s) => sum + s.text.length, 0);
        metrics.query.lengths.v1Chars = null;
-        metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v0 || '').length;
+        metrics.query.lengths.rerankChars = String(bundle.rerankQuery || '').length;
    }

    xbLog.info(MODULE_ID,
-        `Query Build: focus=[${bundle.focusEntities.join(',')}] lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
+        `Query Build: focus=[${bundle.focusEntities.join(',')}] segments=${bundle.querySegments.length} lexTerms=[${bundle.lexicalTerms.slice(0, 5).join(',')}]`
    );

    // ═══════════════════════════════════════════════════════════════════
-    // 阶段 2: Round 1 Dense Retrieval
+    // 阶段 2: Round 1 Dense Retrieval（batch embed → 加权平均）
    // ═══════════════════════════════════════════════════════════════════

-    let queryVector_v0;
+    const segmentTexts = bundle.querySegments.map(s => s.text);
+    if (!segmentTexts.length) {
+        metrics.timing.total = Math.round(performance.now() - T0);
+        return {
+            events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
+            focusEntities: bundle.focusEntities,
+            elapsed: metrics.timing.total,
+            logText: 'No query segments.',
+            metrics,
+        };
+    }
+
+    let r1Vectors;
    try {
-        const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 10000 });
-        queryVector_v0 = vec;
+        r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 10000 });
    } catch (e1) {
        xbLog.warn(MODULE_ID, 'Round 1 向量化失败，500ms 后重试', e1);
        await new Promise(r => setTimeout(r, 500));
        try {
-            const [vec] = await embed([bundle.queryText_v0], vectorConfig, { timeout: 15000 });
-            queryVector_v0 = vec;
+            r1Vectors = await embed(segmentTexts, vectorConfig, { timeout: 15000 });
        } catch (e2) {
            xbLog.error(MODULE_ID, 'Round 1 向量化重试仍失败', e2);
            metrics.timing.total = Math.round(performance.now() - T0);
@@ -909,13 +997,31 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
        }
    }

+    if (!r1Vectors?.length || r1Vectors.some(v => !v?.length)) {
+        metrics.timing.total = Math.round(performance.now() - T0);
+        return {
+            events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
+            focusEntities: bundle.focusEntities,
+            elapsed: metrics.timing.total,
+            logText: 'Empty query vectors (round 1).',
+            metrics,
+        };
+    }
+
+    const r1Weights = computeSegmentWeights(bundle.querySegments);
+    const queryVector_v0 = weightedAverageVectors(r1Vectors, r1Weights);
+
+    if (metrics) {
+        metrics.query.segmentWeights = r1Weights.map(w => Number(w.toFixed(3)));
+    }
+
    if (!queryVector_v0?.length) {
        metrics.timing.total = Math.round(performance.now() - T0);
        return {
            events: [], l0Selected: [], l1ByFloor: new Map(), causalChain: [],
            focusEntities: bundle.focusEntities,
            elapsed: metrics.timing.total,
-            logText: 'Empty query vector (round 1).',
+            logText: 'Weighted average produced empty vector.',
            metrics,
        };
    }
@@ -929,7 +1035,7 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    const r1EventTime = Math.round(performance.now() - T_R1_Event_Start);

    xbLog.info(MODULE_ID,
-        `Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)`
+        `Round 1: anchors=${anchorHits_v0.length} events=${eventHits_v0.length} weights=[${r1Weights.map(w => w.toFixed(2)).join(',')}] (anchor=${r1AnchorTime}ms event=${r1EventTime}ms)`
    );

    // ═══════════════════════════════════════════════════════════════════
@@ -943,27 +1049,44 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    metrics.query.refineTime = Math.round(performance.now() - T_Refine_Start);
    metrics.anchor.focusEntities = bundle.focusEntities;

-    if (metrics.query?.lengths) {
-        metrics.query.lengths.v1Chars = bundle.queryText_v1 == null ? null : String(bundle.queryText_v1).length;
-        metrics.query.lengths.rerankChars = String(bundle.rerankQuery || bundle.queryText_v1 || bundle.queryText_v0 || '').length;
+    // 更新 v1 长度指标
+    if (metrics.query?.lengths && bundle.hintsSegment) {
+        metrics.query.lengths.v1Chars = metrics.query.lengths.v0Chars + bundle.hintsSegment.text.length;
    }

    xbLog.info(MODULE_ID,
-        `Refinement: focus=[${bundle.focusEntities.join(',')}] hasV1=${!!bundle.queryText_v1} (${metrics.query.refineTime}ms)`
+        `Refinement: focus=[${bundle.focusEntities.join(',')}] hasHints=${!!bundle.hintsSegment} (${metrics.query.refineTime}ms)`
    );

    // ═══════════════════════════════════════════════════════════════════
-    // 阶段 4: Round 2 Dense Retrieval
+    // 阶段 4: Round 2 Dense Retrieval（复用 R1 向量 + embed hints）
    // ═══════════════════════════════════════════════════════════════════

-    const queryTextFinal = bundle.queryText_v1 || bundle.queryText_v0;
-
    let queryVector_v1;
-    try {
-        const [vec] = await embed([queryTextFinal], vectorConfig, { timeout: 10000 });
-        queryVector_v1 = vec;
-    } catch (e) {
-        xbLog.warn(MODULE_ID, 'Round 2 向量化失败，降级使用 Round 1 向量', e);
+
+    if (bundle.hintsSegment) {
+        try {
+            const [hintsVec] = await embed([bundle.hintsSegment.text], vectorConfig, { timeout: 10000 });
+
+            if (hintsVec?.length) {
+                const r2Weights = computeR2Weights(bundle.querySegments, bundle.hintsSegment);
+                queryVector_v1 = weightedAverageVectors([...r1Vectors, hintsVec], r2Weights);
+
+                if (metrics) {
+                    metrics.query.r2Weights = r2Weights.map(w => Number(w.toFixed(3)));
+                }
+
+                xbLog.info(MODULE_ID,
+                    `Round 2 weights: [${r2Weights.map(w => w.toFixed(2)).join(',')}]`
+                );
+            } else {
+                queryVector_v1 = queryVector_v0;
+            }
+        } catch (e) {
+            xbLog.warn(MODULE_ID, 'Round 2 hints 向量化失败，降级使用 Round 1 向量', e);
+            queryVector_v1 = queryVector_v0;
+        }
+    } else {
        queryVector_v1 = queryVector_v0;
    }

@@ -1082,13 +1205,14 @@ export async function recallMemory(allEvents, vectorConfig, options = {}) {
    metrics.event.entityNames = bundle.focusEntities;
    metrics.event.entitiesUsed = bundle.focusEntities.length;

-    console.group('%c[Recall v7]', 'color: #7c3aed; font-weight: bold');
+    console.group('%c[Recall v8]', 'color: #7c3aed; font-weight: bold');
    console.log(`Total: ${metrics.timing.total}ms`);
    console.log(`Query Build: ${metrics.query.buildTime}ms | Refine: ${metrics.query.refineTime}ms`);
+    console.log(`R1 weights: [${r1Weights.map(w => w.toFixed(2)).join(', ')}]`);
    console.log(`Focus: [${bundle.focusEntities.join(', ')}]`);
    console.log(`Round 2 Anchors: ${anchorHits.length} hits → ${anchorFloors_dense.size} floors`);
    console.log(`Lexical: chunks=${lexicalResult.chunkIds.length} events=${lexicalResult.eventIds.length}`);
-    console.log(`Fusion (floor): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
+    console.log(`Fusion (floor, weighted): dense=${metrics.fusion.denseFloors} lex=${metrics.fusion.lexFloors} → cap=${metrics.fusion.afterCap} (${metrics.fusion.time}ms)`);
    console.log(`Floor Rerank: ${metrics.evidence.beforeRerank || 0} → ${metrics.evidence.floorsSelected || 0} floors → L0=${metrics.evidence.l0Collected || 0} (${metrics.evidence.rerankTime || 0}ms)`);
    console.log(`L1: ${metrics.evidence.l1Pulled || 0} pulled → ${metrics.evidence.l1Attached || 0} attached (${metrics.evidence.l1CosineTime || 0}ms)`);
    console.log(`Events: ${eventHits.length} hits, ${causalChain.length} causal`);