LittleWhiteBox/modules/story-summary/vector/pipeline/state-recall.js

// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - State Recall (L0)
// L0 语义锚点召回 + floor bonus + 虚拟 chunk 转换
// ═══════════════════════════════════════════════════════════════════════════

import { getContext } from '../../../../../../../extensions.js';
import { getAllStateVectors, getStateAtoms } from '../storage/state-store.js';
import { getMeta } from '../storage/chunk-store.js';
import { getEngineFingerprint } from '../utils/embedder.js';
import { xbLog } from '../../../../core/debug-core.js';

const MODULE_ID = 'state-recall';

const CONFIG = {
    MAX_RESULTS: 20,
    MIN_SIMILARITY: 0.55,
};

// ═══════════════════════════════════════════════════════════════════════════
// 工具函数
// ═══════════════════════════════════════════════════════════════════════════

function cosineSimilarity(a, b) {
    if (!a?.length || !b?.length || a.length !== b.length) return 0;
    let dot = 0, nA = 0, nB = 0;
    for (let i = 0; i < a.length; i++) {
        dot += a[i] * b[i];
        nA += a[i] * a[i];
        nB += b[i] * b[i];
    }
    return nA && nB ? dot / (Math.sqrt(nA) * Math.sqrt(nB)) : 0;
}

// ═══════════════════════════════════════════════════════════════════════════
// L0 向量检索
// ═══════════════════════════════════════════════════════════════════════════

/**
 * 检索与 query 相似的 StateAtoms
 * @returns {Array<{atom, similarity}>}
 */
export async function searchStateAtoms(queryVector, vectorConfig) {
    const { chatId } = getContext();
    if (!chatId || !queryVector?.length) return [];

    // 检查 fingerprint
    const meta = await getMeta(chatId);
    const fp = getEngineFingerprint(vectorConfig);
    if (meta.fingerprint && meta.fingerprint !== fp) {
        xbLog.warn(MODULE_ID, 'fingerprint 不匹配，跳过 L0 召回');
        return [];
    }

    // 获取向量
    const stateVectors = await getAllStateVectors(chatId);
    if (!stateVectors.length) return [];

    // 获取 atoms（用于关联 semantic 等字段）
    const atoms = getStateAtoms();
    const atomMap = new Map(atoms.map(a => [a.atomId, a]));

    // 计算相似度
    const scored = stateVectors
        .map(sv => {
            const atom = atomMap.get(sv.atomId);
            if (!atom) return null;

            return {
                atomId: sv.atomId,
                floor: sv.floor,
                similarity: cosineSimilarity(queryVector, sv.vector),
                atom,
            };
        })
        .filter(Boolean)
        .filter(s => s.similarity >= CONFIG.MIN_SIMILARITY)
        .sort((a, b) => b.similarity - a.similarity)
        .slice(0, CONFIG.MAX_RESULTS);

    return scored;
}

// ═══════════════════════════════════════════════════════════════════════════
// Floor Bonus 构建
// ═══════════════════════════════════════════════════════════════════════════

/**
 * 构建 L0 相关楼层的加权映射
 * @returns {Map<number, number>}
 */
export function buildL0FloorBonus(l0Results, bonusFactor = 0.10) {
    const floorBonus = new Map();

    for (const r of l0Results || []) {
        // 每个楼层只加一次，取最高相似度对应的 bonus
        // 简化处理：统一加 bonusFactor，不区分相似度高低
        if (!floorBonus.has(r.floor)) {
            floorBonus.set(r.floor, bonusFactor);
        }
    }

    return floorBonus;
}

// ═══════════════════════════════════════════════════════════════════════════
// 虚拟 Chunk 转换
// ═══════════════════════════════════════════════════════════════════════════

/**
 * 将 L0 结果转换为虚拟 chunk 格式
 * 用于和 L1 chunks 统一处理
 */
export function stateToVirtualChunks(l0Results) {
    return (l0Results || []).map(r => ({
        chunkId: `state-${r.atomId}`,
        floor: r.floor,
        chunkIdx: -1,           // 负值，排序时排在 L1 前面
        speaker: '📌',          // 固定标记
        isUser: false,
        text: r.atom.semantic,
        textHash: null,
        similarity: r.similarity,
        isL0: true,             // 标记字段
        // 保留原始 atom 信息
        _atom: r.atom,
    }));
}

// ═══════════════════════════════════════════════════════════════════════════
// 每楼层稀疏去重
// ═══════════════════════════════════════════════════════════════════════════

/**
 * 合并 L0 和 L1 chunks
 * @param {Array} l0Chunks - L0 虚拟 chunks（带 similarity）
 * @param {Array} l1Chunks - L1 真实 chunks（无 similarity）
 * @param {number} limit - 每楼层上限
 * @returns {Array} 合并后的 chunks
 */
export function mergeAndSparsify(l0Chunks, l1Chunks, limit = 2) {
    // 构建 L0 楼层 → 最高 similarity 映射
    const floorSimilarity = new Map();
    for (const c of (l0Chunks || [])) {
        const existing = floorSimilarity.get(c.floor) || 0;
        if ((c.similarity || 0) > existing) {
            floorSimilarity.set(c.floor, c.similarity || 0);
        }
    }

    // L1 继承所属楼层的 L0 similarity
    const l1WithScore = (l1Chunks || []).map(c => ({
        ...c,
        similarity: floorSimilarity.get(c.floor) || 0.5,
    }));

    // 合并并按相似度排序
    const all = [...(l0Chunks || []), ...l1WithScore]
        .sort((a, b) => {
            // 相似度优先
            const simDiff = (b.similarity || 0) - (a.similarity || 0);
            if (Math.abs(simDiff) > 0.01) return simDiff;

            // 同楼层：L0 优先于 L1
            if (a.floor === b.floor) {
                if (a.isL0 && !b.isL0) return -1;
                if (!a.isL0 && b.isL0) return 1;
            }

            // 按楼层升序
            return a.floor - b.floor;
        });

    // 每楼层稀疏去重
    const byFloor = new Map();

    for (const c of all) {
        const arr = byFloor.get(c.floor) || [];
        if (arr.length < limit) {
            arr.push(c);
            byFloor.set(c.floor, arr);
        }
    }

    // 扁平化并保持排序
    return Array.from(byFloor.values())
        .flat()
        .sort((a, b) => (b.similarity || 0) - (a.similarity || 0));
}