Update story summary recall and prompt

2026-02-05 00:22:02 +08:00
parent 12db08abe0
commit 8137e206f9
18 changed files with 708 additions and 406 deletions
--- a/modules/story-summary/vector/pipeline/chunk-builder.js
+++ b/modules/story-summary/vector/pipeline/chunk-builder.js
@@ -3,7 +3,7 @@
 // 标准 RAG chunking: ~200 tokens per chunk
 // ═══════════════════════════════════════════════════════════════════════════

-import { getContext } from '../../../../../../extensions.js';
+import { getContext } from '../../../../../../../extensions.js';
 import {
    getMeta,
    updateMeta,
@@ -15,10 +15,10 @@ import {
    makeChunkId,
    hashText,
    CHUNK_MAX_TOKENS,
-} from './chunk-store.js';
-import { embed, getEngineFingerprint } from './embedder.js';
-import { xbLog } from '../../../core/debug-core.js';
-import { filterText } from './text-filter.js';
+} from '../storage/chunk-store.js';
+import { embed, getEngineFingerprint } from '../utils/embedder.js';
+import { xbLog } from '../../../../core/debug-core.js';
+import { filterText } from '../utils/text-filter.js';

 const MODULE_ID = 'chunk-builder';

@@ -339,7 +339,7 @@ export async function syncOnMessageReceived(chatId, lastFloor, message, vectorCo

    // 本地模型未加载时跳过（避免意外触发下载或报错）
    if (vectorConfig.engine === "local") {
-        const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("./embedder.js");
+        const { isLocalModelLoaded, DEFAULT_LOCAL_MODEL } = await import("../utils/embedder.js");
        const modelId = vectorConfig.local?.modelId || DEFAULT_LOCAL_MODEL;
        if (!isLocalModelLoaded(modelId)) return;
    }
--- a/modules/story-summary/vector/pipeline/state-integration.js
+++ b/modules/story-summary/vector/pipeline/state-integration.js
@@ -3,8 +3,8 @@
 // 事件监听 + 回滚钩子注册
 // ═══════════════════════════════════════════════════════════════════════════

-import { getContext } from '../../../../../../extensions.js';
-import { xbLog } from '../../../core/debug-core.js';
+import { getContext } from '../../../../../../../extensions.js';
+import { xbLog } from '../../../../core/debug-core.js';
 import {
    saveStateAtoms,
    saveStateVectors,
@@ -12,9 +12,9 @@ import {
    deleteStateVectorsFromFloor,
    getStateAtoms,
    clearStateVectors,
-} from './state-store.js';
-import { embed, getEngineFingerprint } from './embedder.js';
-import { getVectorConfig } from '../data/config.js';
+} from '../storage/state-store.js';
+import { embed, getEngineFingerprint } from '../utils/embedder.js';
+import { getVectorConfig } from '../../data/config.js';

 const MODULE_ID = 'state-integration';

--- a/modules/story-summary/vector/pipeline/state-recall.js
+++ b/modules/story-summary/vector/pipeline/state-recall.js
@@ -3,11 +3,11 @@
 // L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
 // ═══════════════════════════════════════════════════════════════════════════

-import { getContext } from '../../../../../../extensions.js';
-import { getAllStateVectors, getStateAtoms } from './state-store.js';
-import { getMeta } from './chunk-store.js';
-import { getEngineFingerprint } from './embedder.js';
-import { xbLog } from '../../../core/debug-core.js';
+import { getContext } from '../../../../../../../extensions.js';
+import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
+import { getMeta } from '../storage/chunk-store.js';
+import { getEngineFingerprint } from '../utils/embedder.js';
+import { xbLog } from '../../../../core/debug-core.js';

 const MODULE_ID = 'state-recall';

--- a/modules/story-summary/vector/retrieval/entity.js
+++ b/modules/story-summary/vector/retrieval/entity.js
--- a/modules/story-summary/vector/retrieval/recall.js
+++ b/modules/story-summary/vector/retrieval/recall.js
@@ -1,4 +1,4 @@
-// Story Summary - Recall Engine
+// Story Summary - Recall Engine
 // L1 chunk + L2 event 召回
 // - 全量向量打分
 // - 实体权重归一化分配
@@ -8,19 +8,19 @@
 // - MMR 去重（融合后执行）
 // - floor 稀疏去重

-import { getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from './chunk-store.js';
-import { embed, getEngineFingerprint } from './embedder.js';
-import { xbLog } from '../../../core/debug-core.js';
-import { getContext } from '../../../../../../extensions.js';
-import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../data/store.js';
-import { filterText } from './text-filter.js';
+import { getAllChunks, getAllEventVectors, getAllChunkVectors, getChunksByFloors, getMeta } from '../storage/chunk-store.js';
+import { embed, getEngineFingerprint } from '../utils/embedder.js';
+import { xbLog } from '../../../../core/debug-core.js';
+import { getContext } from '../../../../../../../extensions.js';
+import { getSummaryStore, getFacts, getNewCharacters, isRelationFact } from '../../data/store.js';
+import { filterText } from '../utils/text-filter.js';
 import {
    searchStateAtoms,
    buildL0FloorBonus,
    stateToVirtualChunks,
    mergeAndSparsify,
-} from './state-recall.js';
-import { ensureEventTextIndex, searchEventsByText } from './text-search.js';
+} from '../pipeline/state-recall.js';
+import { ensureEventTextIndex, searchEventsByText, ensureChunkTextIndex, searchChunksByText } from './text-search.js';
 import {
    extractRareTerms,
    extractNounsFromFactsO,
@@ -29,10 +29,8 @@ import {
 const MODULE_ID = 'recall';

 const CONFIG = {
-    QUERY_MSG_COUNT: 5,
-    QUERY_DECAY_BETA: 0.7,
-    QUERY_MAX_CHARS: 600,
-    QUERY_CONTEXT_CHARS: 240,
+    QUERY_MSG_COUNT: 3,
+    QUERY_DECAY_BETA: 0.6,

    CAUSAL_CHAIN_MAX_DEPTH: 10,
    CAUSAL_INJECT_MAX: 30,
@@ -216,11 +214,26 @@ function extractRelationTarget(p) {
    return '';
 }

-function buildExpDecayWeights(n, beta) {
+function buildContentAwareWeights(segments, beta = 0.6) {
+    const n = segments.length;
+    if (n === 0) return [];
+    if (n === 1) return [1.0];
+
    const last = n - 1;
-    const w = Array.from({ length: n }, (_, i) => Math.exp(beta * (i - last)));
-    const sum = w.reduce((a, b) => a + b, 0) || 1;
-    return w.map(x => x / sum);
+    const SHORT_THRESHOLD = 15;
+    const raw = [];
+
+    for (let i = 0; i < n; i++) {
+        const posWeight = Math.exp(beta * (i - last));
+        const len = String(segments[i] || '').replace(/\s+/g, '').length;
+        const contentFactor = len >= SHORT_THRESHOLD
+            ? 1.0
+            : Math.max(0.3, Math.sqrt(len / SHORT_THRESHOLD));
+        raw.push(posWeight * contentFactor);
+    }
+
+    const sum = raw.reduce((a, b) => a + b, 0) || 1;
+    return raw.map(w => w / sum);
 }

 // ═══════════════════════════════════════════════════════════════════════════
@@ -247,19 +260,16 @@ function buildQuerySegments(chat, count, excludeLastAi, pendingUserMessage = nul
        }
    }

-    return messages.slice(-count).map((m, idx, arr) => {
-        const speaker = m.name || (m.is_user ? (name1 || "用户") : "角色");
-        const clean = cleanForRecall(m.mes);
-        if (!clean) return '';
-        const limit = idx === arr.length - 1 ? CONFIG.QUERY_MAX_CHARS : CONFIG.QUERY_CONTEXT_CHARS;
-        return `${speaker}: ${clean.slice(0, limit)}`;
-    }).filter(Boolean);
+    return messages.slice(-count)
+        .map((m) => cleanForRecall(m.mes) || '')
+        .filter(Boolean);
 }

 async function embedWeightedQuery(segments, vectorConfig) {
    if (!segments?.length) return null;

-    const weights = buildExpDecayWeights(segments.length, CONFIG.QUERY_DECAY_BETA);
+    const weights = buildContentAwareWeights(segments, CONFIG.QUERY_DECAY_BETA);
+
    const vecs = await embed(segments, vectorConfig);
    const dims = vecs?.[0]?.length || 0;
    if (!dims) return null;
@@ -377,19 +387,6 @@ function expandByFacts(presentEntities, facts, maxDepth = 2) {
 // 实体权重归一化（用于加分分配）
 // ═══════════════════════════════════════════════════════════════════════════

-function normalizeEntityWeights(queryEntityWeights) {
-    if (!queryEntityWeights?.size) return new Map();
-
-    const total = Array.from(queryEntityWeights.values()).reduce((a, b) => a + b, 0);
-    if (total <= 0) return new Map();
-
-    const normalized = new Map();
-    for (const [entity, weight] of queryEntityWeights) {
-        normalized.set(entity, weight / total);
-    }
-    return normalized;
-}
-
 // ═══════════════════════════════════════════════════════════════════════════
 // 文本路 Query 构建（分层高信号词）
 // ═══════════════════════════════════════════════════════════════════════════
@@ -548,7 +545,167 @@ function mmrSelect(candidates, k, lambda, getVector, getScore) {
 // L1 Chunks 检索
 // ═══════════════════════════════════════════════════════════════════════════

-async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1) {
+async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus = new Map()) {
+    const { chatId } = getContext();
+    if (!chatId || !queryVector?.length) return [];
+
+    const meta = await getMeta(chatId);
+    const fp = getEngineFingerprint(vectorConfig);
+    if (meta.fingerprint && meta.fingerprint !== fp) return [];
+
+    const eventVectors = await getAllEventVectors(chatId);
+    const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
+    if (!vectorMap.size) return [];
+
+    // 构建/更新文本索引
+    const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
+    ensureEventTextIndex(allEvents, revision);
+
+    // 文本路检索
+    const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
+    const textGapInfo = textRanked._gapInfo || null;
+
+    // 向量路检索
+    const scored = (allEvents || []).map((event, idx) => {
+        const v = vectorMap.get(event.id);
+        const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
+
+        let bonus = 0;
+
+        // L0 加权
+        const range = parseFloorRange(event.summary);
+        if (range) {
+            for (let f = range.start; f <= range.end; f++) {
+                if (l0FloorBonus.has(f)) {
+                    bonus += l0FloorBonus.get(f);
+                    break;
+                }
+            }
+        }
+
+        const participants = (event.participants || []).map(p => normalize(p));
+        const hasPresent = participants.some(p => queryEntitySet.has(p));
+
+        return {
+            _id: event.id,
+            _idx: idx,
+            event,
+            rawSim,
+            finalScore: rawSim + bonus,
+            vector: v,
+            _hasPresent: hasPresent,
+        };
+    });
+
+    const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
+    const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
+
+    const preFilterDistribution = {
+        total: scored.length,
+        '0.85+': scored.filter(s => s.finalScore >= 0.85).length,
+        '0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
+        '0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
+        '0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
+        '<0.5': scored.filter(s => s.finalScore < 0.5).length,
+        passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
+        threshold: CONFIG.MIN_SIMILARITY_EVENT,
+    };
+
+    const candidates = scored
+        .filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
+        .sort((a, b) => b.finalScore - a.finalScore)
+        .slice(0, CONFIG.CANDIDATE_EVENTS);
+
+    const vectorRanked = candidates.map(s => ({
+        event: s.event,
+        similarity: s.finalScore,
+        rawSim: s.rawSim,
+        vector: s.vector,
+    }));
+
+    const eventById = new Map(allEvents.map(e => [e.id, e]));
+    const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
+
+    // TEXT-only 质量门槛
+    const textOnlyStats = {
+        total: 0,
+        passedSoftCheck: 0,
+        filtered: 0,
+    };
+
+    const filtered = fused.filter(x => {
+        if (x.type !== 'TEXT') return true;
+
+        textOnlyStats.total++;
+
+        const sim = x.rawSim || rawSimById.get(x.id) || 0;
+        if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
+            textOnlyStats.passedSoftCheck++;
+            return true;
+        }
+
+        textOnlyStats.filtered++;
+        return false;
+    });
+
+    const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
+        ...x,
+        _id: x.id,
+    }));
+
+    const mmrOutput = mmrSelect(
+        mmrInput,
+        CONFIG.MAX_EVENTS,
+        CONFIG.MMR_LAMBDA,
+        c => c.vector || null,
+        c => c.rrf
+    );
+
+    // TEXT-only 限额（MMR 后执行）
+    let textOnlyCount = 0;
+    let textOnlyTruncated = 0;
+
+    const finalResults = mmrOutput.filter(x => {
+        if (x.type !== 'TEXT') return true;
+
+        if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
+            textOnlyCount++;
+            return true;
+        }
+
+        textOnlyTruncated++;
+        return false;
+    });
+
+    textOnlyStats.finalIncluded = textOnlyCount;
+    textOnlyStats.truncatedByLimit = textOnlyTruncated;
+
+    const results = finalResults.map(x => ({
+        event: x.event,
+        similarity: x.rrf,
+        _recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
+        _recallReason: x.type,
+        _rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
+        _rawSim: rawSimById.get(x.event?.id) || 0,
+    }));
+
+    if (results.length > 0) {
+        results[0]._preFilterDistribution = preFilterDistribution;
+        results[0]._rrfStats = {
+            vectorCount: vectorRanked.length,
+            textCount: textRanked.length,
+            hybridCount: fused.filter(x => x.type === 'HYBRID').length,
+            vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
+            textOnlyTotal: textOnlyStats.total,
+        };
+        results[0]._textOnlyStats = textOnlyStats;
+        results[0]._textGapInfo = textGapInfo;
+    }
+
+    return results;
+}
+
+async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(), lastSummarizedFloor = -1, textSearchParams = null) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length) return [];

@@ -577,6 +734,58 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
        };
    });

+    // 文本路补充（仅待整理区）
+    let textL1Stats = null;
+    const store = getSummaryStore();
+    const keepVisible = store?.keepVisibleCount ?? 3;
+    const recentStart = lastSummarizedFloor + 1;
+    const recentEnd = (meta?.lastChunkFloor ?? -1) - keepVisible;
+
+    if (textSearchParams && recentEnd >= recentStart && recentEnd >= 0) {
+        const { queryEntities, rareTerms } = textSearchParams;
+        const textQuery = [...(queryEntities || []), ...(rareTerms || [])].join(' ');
+
+        if (textQuery.trim()) {
+            const allChunks = await getAllChunks(chatId);
+            const recentChunks = allChunks.filter(c => c.floor >= recentStart && c.floor <= recentEnd);
+
+            if (recentChunks.length > 0) {
+                const revision = `${chatId}:chunk:${recentEnd}`;
+                ensureChunkTextIndex(recentChunks, revision);
+
+                const textHits = searchChunksByText(textQuery, recentStart, recentEnd, 20);
+
+                textL1Stats = {
+                    range: `${recentStart + 1}~${recentEnd + 1}`,
+                    candidates: recentChunks.length,
+                    hits: textHits.length,
+                };
+
+                for (const hit of textHits) {
+                    const existingIdx = scored.findIndex(s => s.chunkId === hit.chunkId);
+
+                    if (existingIdx >= 0) {
+                        scored[existingIdx]._hasTextHit = true;
+                        scored[existingIdx]._textRank = hit.textRank;
+                    } else {
+                        scored.push({
+                            _id: hit.chunkId,
+                            chunkId: hit.chunkId,
+                            floor: hit.floor,
+                            chunkIdx: 0,
+                            similarity: CONFIG.MIN_SIMILARITY_CHUNK_RECENT,
+                            _baseSimilarity: 0,
+                            _l0Bonus: 0,
+                            _recallReason: 'TEXT_L1',
+                            _textRank: hit.textRank,
+                            vector: null,
+                        });
+                    }
+                }
+            }
+        }
+    }
+
    const candidates = scored
        .filter(s => {
            const threshold = s.floor > lastSummarizedFloor
@@ -599,6 +808,7 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
            '0.55-0.6': scored.filter(s => s.similarity >= 0.55 && s.similarity < 0.6).length,
            '<0.55': scored.filter(s => s.similarity < 0.55).length,
        },
+        textL1: textL1Stats,
    };

    const dynamicK = Math.min(CONFIG.MAX_CHUNKS, candidates.length);
@@ -636,6 +846,8 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
            isUser: chunk.isUser,
            text: chunk.text,
            similarity: item.similarity,
+            _recallReason: item._recallReason,
+            _textRank: item._textRank,
        };
    }).filter(Boolean);

@@ -646,184 +858,6 @@ async function searchChunks(queryVector, vectorConfig, l0FloorBonus = new Map(),
    return results;
 }

-// ═══════════════════════════════════════════════════════════════════════════
-// L2 Events 检索（RRF 混合 + MMR 后置）
-// ═══════════════════════════════════════════════════════════════════════════
-
-async function searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus = new Map()) {
-    const { chatId } = getContext();
-    if (!chatId || !queryVector?.length) return [];
-
-    const meta = await getMeta(chatId);
-    const fp = getEngineFingerprint(vectorConfig);
-    if (meta.fingerprint && meta.fingerprint !== fp) return [];
-
-    const eventVectors = await getAllEventVectors(chatId);
-    const vectorMap = new Map(eventVectors.map(v => [v.eventId, v.vector]));
-    if (!vectorMap.size) return [];
-
-    // 构建/更新文本索引
-    const revision = `${chatId}:${store?.updatedAt || 0}:${allEvents.length}`;
-    ensureEventTextIndex(allEvents, revision);
-
-    // 文本路检索
-    const textRanked = searchEventsByText(queryTextForSearch, CONFIG.TEXT_SEARCH_LIMIT);
-    const textGapInfo = textRanked._gapInfo || null;
-
-    // ═══════════════════════════════════════════════════════════════════════
-    // 向量路检索（只保留 L0 加权）
-    // ═══════════════════════════════════════════════════════════════════════
-
-    const ENTITY_BONUS_POOL = 0.10;
-
-    const scored = (allEvents || []).map((event, idx) => {
-        const v = vectorMap.get(event.id);
-        const rawSim = v ? cosineSimilarity(queryVector, v) : 0;
-
-        let bonus = 0;
-
-        // L0 加权
-        const range = parseFloorRange(event.summary);
-        if (range) {
-            for (let f = range.start; f <= range.end; f++) {
-                if (l0FloorBonus.has(f)) {
-                    bonus += l0FloorBonus.get(f);
-                    break;
-                }
-            }
-        }
-
-        const participants = (event.participants || []).map(p => normalize(p));
-        let maxEntityWeight = 0;
-        for (const p of participants) {
-            const w = normalizedEntityWeights.get(p) || 0;
-            if (w > maxEntityWeight) {
-                maxEntityWeight = w;
-            }
-        }
-        const entityBonus = ENTITY_BONUS_POOL * maxEntityWeight;
-        bonus += entityBonus;
-
-        return {
-            _id: event.id,
-            _idx: idx,
-            event,
-            rawSim,
-            finalScore: rawSim + bonus,
-            vector: v,
-            _entityBonus: entityBonus,
-            _hasPresent: maxEntityWeight > 0,
-        };
-    });
-
-    const rawSimById = new Map(scored.map(s => [s._id, s.rawSim]));
-    const entityBonusById = new Map(scored.map(s => [s._id, s._entityBonus]));
-    const hasPresentById = new Map(scored.map(s => [s._id, s._hasPresent]));
-
-    const preFilterDistribution = {
-        total: scored.length,
-        '0.85+': scored.filter(s => s.finalScore >= 0.85).length,
-        '0.7-0.85': scored.filter(s => s.finalScore >= 0.7 && s.finalScore < 0.85).length,
-        '0.6-0.7': scored.filter(s => s.finalScore >= 0.6 && s.finalScore < 0.7).length,
-        '0.5-0.6': scored.filter(s => s.finalScore >= 0.5 && s.finalScore < 0.6).length,
-        '<0.5': scored.filter(s => s.finalScore < 0.5).length,
-        passThreshold: scored.filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT).length,
-        threshold: CONFIG.MIN_SIMILARITY_EVENT,
-    };
-
-    const candidates = scored
-        .filter(s => s.finalScore >= CONFIG.MIN_SIMILARITY_EVENT)
-        .sort((a, b) => b.finalScore - a.finalScore)
-        .slice(0, CONFIG.CANDIDATE_EVENTS);
-
-    const vectorRanked = candidates.map(s => ({
-        event: s.event,
-        similarity: s.finalScore,
-        rawSim: s.rawSim,
-        vector: s.vector,
-    }));
-
-    const eventById = new Map(allEvents.map(e => [e.id, e]));
-    const fused = fuseEventsByRRF(vectorRanked, textRanked, eventById);
-
-    const textOnlyStats = {
-        total: 0,
-        passedSoftCheck: 0,
-        filtered: 0,
-        finalIncluded: 0,
-        truncatedByLimit: 0,
-    };
-
-    const filtered = fused.filter(x => {
-        if (x.type !== 'TEXT') return true;
-
-        textOnlyStats.total++;
-        const sim = x.rawSim || rawSimById.get(x.id) || 0;
-        if (sim >= CONFIG.TEXT_SOFT_MIN_SIM) {
-            textOnlyStats.passedSoftCheck++;
-            return true;
-        }
-
-        textOnlyStats.filtered++;
-        return false;
-    });
-
-    const mmrInput = filtered.slice(0, CONFIG.CANDIDATE_EVENTS).map(x => ({
-        ...x,
-        _id: x.id,
-    }));
-
-    const mmrOutput = mmrSelect(
-        mmrInput,
-        CONFIG.MAX_EVENTS,
-        CONFIG.MMR_LAMBDA,
-        c => c.vector || null,
-        c => c.rrf
-    );
-
-    let textOnlyCount = 0;
-    const finalResults = mmrOutput.filter(x => {
-        if (x.type !== 'TEXT') return true;
-        if (textOnlyCount < CONFIG.TEXT_TOTAL_MAX) {
-            textOnlyCount++;
-            return true;
-        }
-        textOnlyStats.truncatedByLimit++;
-        return false;
-    });
-    textOnlyStats.finalIncluded = textOnlyCount;
-
-    const results = finalResults.map(x => ({
-        event: x.event,
-        similarity: x.rrf,
-        _recallType: hasPresentById.get(x.event?.id) ? 'DIRECT' : 'SIMILAR',
-        _recallReason: x.type,
-        _rrfDetail: { vRank: x.vRank, tRank: x.tRank, rrf: x.rrf },
-        _entityBonus: entityBonusById.get(x.event?.id) || 0,
-        _rawSim: rawSimById.get(x.event?.id) || 0,
-    }));
-
-    // 统计信息附加到第一条结果
-    if (results.length > 0) {
-        results[0]._preFilterDistribution = preFilterDistribution;
-        results[0]._rrfStats = {
-            vectorCount: vectorRanked.length,
-            textCount: textRanked.length,
-            hybridCount: fused.filter(x => x.type === 'HYBRID').length,
-            vectorOnlyCount: fused.filter(x => x.type === 'VECTOR').length,
-            textOnlyTotal: textOnlyStats.total,
-        };
-        results[0]._textOnlyStats = textOnlyStats;
-        results[0]._textGapInfo = textGapInfo;
-    }
-
-    return results;
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
-// 日志
-// ═══════════════════════════════════════════════════════════════════════════
-
 function formatRecallLog({
    elapsed,
    segments,
@@ -831,7 +865,7 @@ function formatRecallLog({
    chunkResults,
    eventResults,
    allEvents,
-    normalizedEntityWeights = new Map(),
+    queryEntities = [],
    causalEvents = [],
    chunkPreFilterStats = null,
    l0Results = [],
@@ -840,15 +874,15 @@ function formatRecallLog({
    textQueryBreakdown = null,
 }) {
    const lines = [
-        '\u2554' + '\u2550'.repeat(62) + '\u2557',
-        '\u2551                    记忆召回报告                              \u2551',
-        '\u2560' + '\u2550'.repeat(62) + '\u2563',
-        `\u2551  耗时: ${elapsed}ms`,
-        '\u255a' + '\u2550'.repeat(62) + '\u255d',
+        '╔' + '═'.repeat(62) + '╗',
+        '║                    记忆召回报告                              ║',
+        '╠' + '═'.repeat(62) + '╣',
+        `║  耗时: ${elapsed}ms`,
+        '╚' + '═'.repeat(62) + '╝',
        '',
-        '\u250c' + '\u2500'.repeat(61) + '\u2510',
-        '\u2502 【查询构建】最近 5 条消息，指数衰减加权 (β=0.7)              \u2502',
-        '\u2514' + '\u2500'.repeat(61) + '\u2518',
+        '┌' + '─'.repeat(61) + '┐',
+        `│ 【查询构建】最近 ${CONFIG.QUERY_MSG_COUNT} 条，内容感知加权 (β=${CONFIG.QUERY_DECAY_BETA}) │`,
+        '└' + '─'.repeat(61) + '┘',
    ];

    const segmentsSorted = segments.map((s, i) => ({
@@ -858,25 +892,19 @@ function formatRecallLog({
    })).sort((a, b) => b.weight - a.weight);

    segmentsSorted.forEach((s, rank) => {
-        const bar = '\u2588'.repeat(Math.round(s.weight * 20));
+        const bar = '█'.repeat(Math.round(s.weight * 20));
        const preview = s.text.length > 60 ? s.text.slice(0, 60) + '...' : s.text;
        const marker = rank === 0 ? ' ◀ 主导' : '';
        lines.push(`  ${(s.weight * 100).toFixed(1).padStart(5)}% ${bar.padEnd(12)} ${preview}${marker}`);
    });

    lines.push('');
-    lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
-    lines.push('\u2502 【提取实体】                                                 \u2502');
-    lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
+    lines.push('┌' + '─'.repeat(61) + '┐');
+    lines.push('│ 【提取实体】                                                 │');
+    lines.push('└' + '─'.repeat(61) + '┘');

-    if (normalizedEntityWeights?.size) {
-        const sorted = Array.from(normalizedEntityWeights.entries())
-            .sort((a, b) => b[1] - a[1])
-            .slice(0, 8);
-        const formatted = sorted
-            .map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
-            .join(' | ');
-        lines.push(`  ${formatted}`);
+    if (queryEntities?.length) {
+        lines.push(`  焦点: ${queryEntities.slice(0, 8).join('、')}${queryEntities.length > 8 ? ' ...' : ''}`);
    } else {
        lines.push('  (无)');
    }
@@ -885,9 +913,9 @@ function formatRecallLog({
    }

    lines.push('');
-    lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
-    lines.push('\u2502 【文本路 Query 构成】                                        \u2502');
-    lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
+    lines.push('┌' + '─'.repeat(61) + '┐');
+    lines.push('│ 【文本路 Query 构成】                                        │');
+    lines.push('└' + '─'.repeat(61) + '┘');

    if (textQueryBreakdown) {
        const bd = textQueryBreakdown;
@@ -919,23 +947,9 @@ function formatRecallLog({
    }

    lines.push('');
-    lines.push('  实体归一化（用于加分）:');
-    if (normalizedEntityWeights?.size) {
-        const sorted = Array.from(normalizedEntityWeights.entries())
-            .sort((a, b) => b[1] - a[1])
-            .slice(0, 8);
-        const formatted = sorted
-            .map(([e, w]) => `${e}(${(w * 100).toFixed(0)}%)`)
-            .join(' | ');
-        lines.push(`    ${formatted}`);
-    } else {
-        lines.push('    (无)');
-    }
-
-    lines.push('');
-    lines.push('\u250c' + '\u2500'.repeat(61) + '\u2510');
-    lines.push('\u2502 【召回统计】                                                 \u2502');
-    lines.push('\u2514' + '\u2500'.repeat(61) + '\u2518');
+    lines.push('┌' + '─'.repeat(61) + '┐');
+    lines.push('│ 【召回统计】                                                 │');
+    lines.push('└' + '─'.repeat(61) + '┘');

    // L0
    const l0Floors = [...new Set(l0Results.map(r => r.floor))].sort((a, b) => a - b);
@@ -953,6 +967,11 @@ function formatRecallLog({
        const dist = chunkPreFilterStats.distribution || {};
        lines.push(`    全量: ${chunkPreFilterStats.total} 条 | 通过阈值(远期≥${chunkPreFilterStats.thresholdRemote}, 待整理≥${chunkPreFilterStats.thresholdRecent}): ${chunkPreFilterStats.passThreshold} 条 | 最终: ${chunkResults.length} 条`);
        lines.push(`    匹配度: 0.8+: ${dist['0.8+'] || 0} | 0.7-0.8: ${dist['0.7-0.8'] || 0} | 0.6-0.7: ${dist['0.6-0.7'] || 0}`);
+
+        const textL1 = chunkPreFilterStats.textL1;
+        if (textL1) {
+            lines.push(`    文本路补充（待整理区）: 范围 ${textL1.range}楼 | 候选 ${textL1.candidates} 条 | 命中 ${textL1.hits} 条`);
+        }
    } else {
        lines.push(`    选入: ${chunkResults.length} 条`);
    }
@@ -988,9 +1007,6 @@ function formatRecallLog({
            lines.push(`    ${i + 1}. [${id}] ${title.padEnd(25)} sim=${sim} tRank=${tRank}`);
        });
    }
-    const entityBoostedEvents = eventResults.filter(e => e._entityBonus > 0).length;
-    lines.push('');
-    lines.push(`    实体加分事件: ${entityBoostedEvents} 条`);

    if (textGapInfo) {
        lines.push('');
@@ -1002,7 +1018,6 @@ function formatRecallLog({
        }
    }

-    // Causal
    if (causalEvents.length) {
        const maxRefs = Math.max(...causalEvents.map(c => c.chainFrom?.length || 0));
        const maxDepth = Math.max(...causalEvents.map(c => c.depth || 0));
@@ -1012,13 +1027,8 @@ function formatRecallLog({
    }

    lines.push('');
-    return lines.join('\n');
+    return lines.join("\n");
 }
-
-// ═══════════════════════════════════════════════════════════════════════════
-// 主入口
-// ═══════════════════════════════════════════════════════════════════════════
-
 export async function recallMemory(queryText, allEvents, vectorConfig, options = {}) {
    const T0 = performance.now();
    const { chat } = getContext();
@@ -1049,9 +1059,9 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
    const lexicon = buildEntityLexicon(store, allEvents);
    const queryEntityWeights = extractEntitiesWithWeights(segments, weights, lexicon);
    const queryEntities = Array.from(queryEntityWeights.keys());
+    const queryEntitySet = new Set(queryEntities.map(normalize));
    const facts = getFacts(store);
    const expandedTerms = expandByFacts(queryEntities, facts, 2);
-    const normalizedEntityWeights = normalizeEntityWeights(queryEntityWeights);

    let queryTextForSearch = '';
    let textQueryBreakdown = null;
@@ -1079,8 +1089,11 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
    }

    const [chunkResults, eventResults] = await Promise.all([
-        searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor),
-        searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, normalizedEntityWeights, l0FloorBonus),
+        searchChunks(queryVector, vectorConfig, l0FloorBonus, lastSummarizedFloor, {
+            queryEntities,
+            rareTerms: textQueryBreakdown?.rareTerms || [],
+        }),
+        searchEvents(queryVector, queryTextForSearch, allEvents, vectorConfig, store, queryEntitySet, l0FloorBonus),
    ]);

    const chunkPreFilterStats = chunkResults._preFilterStats || null;
@@ -1118,7 +1131,7 @@ export async function recallMemory(queryText, allEvents, vectorConfig, options =
        chunkResults: mergedChunks,
        eventResults,
        allEvents,
-        normalizedEntityWeights,
+        queryEntities,
        causalEvents: causalEventsTruncated,
        chunkPreFilterStats,
        l0Results,
@@ -1149,3 +1162,8 @@ export function buildQueryText(chat, count = 2, excludeLastAi = false) {
        return `${speaker}: ${text.slice(0, 500)}`;
    }).filter(Boolean).join('\n');
 }
+
+
+
+
+
--- a/modules/story-summary/vector/retrieval/text-search.js
+++ b/modules/story-summary/vector/retrieval/text-search.js
@@ -1,6 +1,6 @@
 // text-search.js - 最终版

-import MiniSearch from '../../../libs/minisearch.mjs';
+import MiniSearch from '../../../../libs/minisearch.mjs';

 const STOP_WORDS = new Set([
    '的', '了', '是', '在', '和', '与', '或', '但', '而', '却',
@@ -106,7 +106,7 @@ export function ensureEventTextIndex(events, revision) {
 *
 * 参考：帕累托法则（80/20 法则）在信息检索中的应用
 */
-function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
+export function dynamicTopK(scores, coverage = 0.90, minK = 15, maxK = 80) {
    if (!scores.length) return 0;

    const total = scores.reduce((a, b) => a + b, 0);
@@ -171,3 +171,67 @@ export function clearEventTextIndex() {
    idx = null;
    lastRevision = null;
 }
+
+// ---------------------------------------------------------------------------
+// Chunk 文本索引（待整理区 L1 补充）
+// ---------------------------------------------------------------------------
+
+let chunkIdx = null;
+let chunkIdxRevision = null;
+
+export function ensureChunkTextIndex(chunks, revision) {
+    if (chunkIdx && revision === chunkIdxRevision) return;
+
+    try {
+        chunkIdx = new MiniSearch({
+            fields: ['text'],
+            storeFields: ['chunkId', 'floor'],
+            tokenize,
+            searchOptions: { tokenize },
+        });
+
+        chunkIdx.addAll(chunks.map(c => ({
+            id: c.chunkId,
+            chunkId: c.chunkId,
+            floor: c.floor,
+            text: c.text || '',
+        })));
+
+        chunkIdxRevision = revision;
+    } catch (e) {
+        console.error('[text-search] Chunk index build failed:', e);
+        chunkIdx = null;
+    }
+}
+
+export function searchChunksByText(query, floorMin, floorMax, limit = 20) {
+    if (!chunkIdx || !query?.trim()) return [];
+
+    try {
+        const results = chunkIdx.search(query, {
+            fuzzy: false,
+            prefix: false,
+        });
+
+        const filtered = results.filter(r => r.floor >= floorMin && r.floor <= floorMax);
+        if (!filtered.length) return [];
+
+        const scores = filtered.map(r => r.score);
+        const k = dynamicTopK(scores, 0.85, 5, limit);
+
+        return filtered.slice(0, k).map((r, i) => ({
+            chunkId: r.chunkId,
+            floor: r.floor,
+            textRank: i + 1,
+            score: r.score,
+        }));
+    } catch (e) {
+        console.error('[text-search] Chunk search failed:', e);
+        return [];
+    }
+}
+
+export function clearChunkTextIndex() {
+    chunkIdx = null;
+    chunkIdxRevision = null;
+}
--- a/modules/story-summary/vector/retrieval/tokenizer.js
+++ b/modules/story-summary/vector/retrieval/tokenizer.js
@@ -1,5 +1,5 @@
-import { xbLog } from '../../../core/debug-core.js';
-import { extensionFolderPath } from '../../../core/constants.js';
+import { xbLog } from '../../../../core/debug-core.js';
+import { extensionFolderPath } from '../../../../core/constants.js';

 const MODULE_ID = 'tokenizer';

--- a/modules/story-summary/vector/storage/chunk-store.js
+++ b/modules/story-summary/vector/storage/chunk-store.js
@@ -8,7 +8,7 @@ import {
    chunkVectorsTable,
    eventVectorsTable,
    CHUNK_MAX_TOKENS,
-} from '../data/db.js';
+} from '../../data/db.js';

 // ═══════════════════════════════════════════════════════════════════════════
 // 工具函数
--- a/modules/story-summary/vector/storage/state-store.js
+++ b/modules/story-summary/vector/storage/state-store.js
@@ -4,11 +4,11 @@
 // StateVector 存 IndexedDB（可重建）
 // ═══════════════════════════════════════════════════════════════════════════

-import { saveMetadataDebounced } from '../../../../../../extensions.js';
-import { chat_metadata } from '../../../../../../../script.js';
-import { stateVectorsTable } from '../data/db.js';
-import { EXT_ID } from '../../../core/constants.js';
-import { xbLog } from '../../../core/debug-core.js';
+import { saveMetadataDebounced } from '../../../../../../../extensions.js';
+import { chat_metadata } from '../../../../../../../../script.js';
+import { stateVectorsTable } from '../../data/db.js';
+import { EXT_ID } from '../../../../core/constants.js';
+import { xbLog } from '../../../../core/debug-core.js';

 const MODULE_ID = 'state-store';

--- a/modules/story-summary/vector/storage/vector-io.js
+++ b/modules/story-summary/vector/storage/vector-io.js
@@ -3,9 +3,9 @@
 // 向量数据导入导出（当前 chatId 级别）
 // ═══════════════════════════════════════════════════════════════════════════

-import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../libs/fflate.mjs';
-import { getContext } from '../../../../../../extensions.js';
-import { xbLog } from '../../../core/debug-core.js';
+import { zipSync, unzipSync, strToU8, strFromU8 } from '../../../../libs/fflate.mjs';
+import { getContext } from '../../../../../../../extensions.js';
+import { xbLog } from '../../../../core/debug-core.js';
 import {
    getMeta,
    updateMeta,
@@ -26,8 +26,8 @@ import {
    saveStateVectors,
    clearStateVectors,
 } from './state-store.js';
-import { getEngineFingerprint } from './embedder.js';
-import { getVectorConfig } from '../data/config.js';
+import { getEngineFingerprint } from '../utils/embedder.js';
+import { getVectorConfig } from '../../data/config.js';

 const MODULE_ID = 'vector-io';
 const EXPORT_VERSION = 1;
--- a/modules/story-summary/vector/utils/embedder.js
+++ b/modules/story-summary/vector/utils/embedder.js
@@ -3,7 +3,7 @@
 // 统一的向量生成接口（本地模型 / 在线服务）
 // ═══════════════════════════════════════════════════════════════════════════

-import { xbLog } from '../../../core/debug-core.js';
+import { xbLog } from '../../../../core/debug-core.js';

 const MODULE_ID = 'embedding';

--- a/modules/story-summary/vector/utils/embedder.worker.js
+++ b/modules/story-summary/vector/utils/embedder.worker.js
--- a/modules/story-summary/vector/utils/text-filter.js
+++ b/modules/story-summary/vector/utils/text-filter.js
@@ -3,7 +3,7 @@
 // 跳过用户定义的「起始→结束」区间
 // ═══════════════════════════════════════════════════════════════════════════

-import { getTextFilterRules } from '../data/config.js';
+import { getTextFilterRules } from '../../data/config.js';

 /**
 * 转义正则特殊字符