2.0变量 , 向量总结正式推送

2026-02-16 00:30:59 +08:00
parent 17b1fe9091
commit cd9fe53f84
75 changed files with 48287 additions and 12186 deletions
--- a/modules/story-summary/vector/llm/atom-extraction.js
+++ b/modules/story-summary/vector/llm/atom-extraction.js
@@ -0,0 +1,376 @@
+// ============================================================================
+// atom-extraction.js - L0 场景锚点提取（v2 - 场景摘要 + 图结构）
+//
+// 设计依据：
+// - BGE-M3 (BAAI, 2024): 自然语言段落检索精度最高 → semantic = 纯自然语言
+// - TransE (Bordes, 2013): s/t/r 三元组方向性 → edges 格式
+//
+// 每楼层 1-2 个场景锚点（非碎片原子），60-100 字场景摘要
+// ============================================================================
+
+import { callLLM, parseJson } from './llm-service.js';
+import { xbLog } from '../../../../core/debug-core.js';
+import { filterText } from '../utils/text-filter.js';
+
+const MODULE_ID = 'atom-extraction';
+
+const CONCURRENCY = 10;
+const RETRY_COUNT = 2;
+const RETRY_DELAY = 500;
+const DEFAULT_TIMEOUT = 20000;
+const STAGGER_DELAY = 80;
+
+let batchCancelled = false;
+
+export function cancelBatchExtraction() {
+    batchCancelled = true;
+}
+
+export function isBatchCancelled() {
+    return batchCancelled;
+}
+
+// ============================================================================
+// L0 提取 Prompt
+// ============================================================================
+
+const SYSTEM_PROMPT = `你是场景摘要器。从一轮对话中提取1-2个场景锚点，用于语义检索和关系追踪。
+
+输入格式：
+<round>
+  <user name="用户名">...</user>
+  <assistant>...</assistant>
+</round>
+
+只输出严格JSON：
+{"anchors":[
+  {
+    "scene": "60-100字完整场景描述",
+    "edges": [{"s":"施事方","t":"受事方","r":"互动行为"}],
+    "where": "地点"
+  }
+]}
+
+## scene 写法
+- 纯自然语言，像旁白或日记，不要任何标签/标记/枚举值
+- 必须包含：角色名、动作、情感氛围、关键细节
+- 读者只看 scene 就能复原这一幕
+- 60-100字，信息密集但流畅
+
+## edges（关系三元组）
+- s=施事方 t=受事方 r=互动行为（建议 6-12 字，最多 20 字）
+- s/t 必须是参与互动的角色正式名称，不用代词或别称
+- 只从正文内容中识别角色名，不要把标签名（如 user、assistant）当作角色
+- r 使用动作模板短语：“动作+对象/结果”（例：“提出交易条件”、“拒绝对方请求”、“当众揭露秘密”、“安抚对方情绪”）
+- r 不要写人名，不要复述整句，不要写心理描写或评价词
+- r 正例（合格）：提出交易条件、拒绝对方请求、当众揭露秘密、安抚对方情绪、强行打断发言、转移谈话焦点
+- r 反例（不合格）：我觉得她现在很害怕、他突然非常生气地大喊起来、user开始说话、assistant解释了很多细节
+- 每个锚点 1-3 条
+
+## where
+- 场景地点，无明确地点时空字符串
+
+## 数量规则
+- 最多2个。1个够时不凑2个
+- 明显场景切换（地点/时间/对象变化）时才2个
+- 同一场景不拆分
+- 无角色互动时返回 {"anchors":[]}
+
+## 示例
+输入：艾拉在火山口举起圣剑刺穿古龙心脏，龙血溅满她的铠甲，她跪倒在地痛哭
+输出：
+{"anchors":[{"scene":"火山口上艾拉举起圣剑刺穿古龙的心脏，龙血溅满铠甲，古龙轰然倒地，艾拉跪倒在滚烫的岩石上痛哭，完成了她不得不做的弑杀","edges":[{"s":"艾拉","t":"古龙","r":"以圣剑刺穿心脏"}],"where":"火山口"}]}`;
+
+const JSON_PREFILL = '{"anchors":[';
+
+// ============================================================================
+// 睡眠工具
+// ============================================================================
+
+const sleep = (ms) => new Promise(r => setTimeout(r, ms));
+
+const ACTION_STRIP_WORDS = [
+    '突然', '非常', '有些', '有点', '轻轻', '悄悄', '缓缓', '立刻',
+    '马上', '然后', '并且', '而且', '开始', '继续', '再次', '正在',
+];
+
+function clamp(v, min, max) {
+    return Math.max(min, Math.min(max, v));
+}
+
+function sanitizeActionPhrase(raw) {
+    let text = String(raw || '')
+        .normalize('NFKC')
+        .replace(/[\u200B-\u200D\uFEFF]/g, '')
+        .trim();
+    if (!text) return '';
+
+    text = text
+        .replace(/[，。！？、；：,.!?;:"'“”‘’()（）[\]{}<>《》]/g, '')
+        .replace(/\s+/g, '');
+
+    for (const word of ACTION_STRIP_WORDS) {
+        text = text.replaceAll(word, '');
+    }
+
+    text = text.replace(/(地|得|了|着|过)+$/g, '');
+
+    if (text.length < 2) return '';
+    if (text.length > 12) text = text.slice(0, 12);
+    return text;
+}
+
+function calcAtomQuality(scene, edges, where) {
+    const sceneLen = String(scene || '').length;
+    const sceneScore = clamp(sceneLen / 80, 0, 1);
+    const edgeScore = clamp((edges?.length || 0) / 3, 0, 1);
+    const whereScore = where ? 1 : 0;
+    const quality = 0.55 * sceneScore + 0.35 * edgeScore + 0.10 * whereScore;
+    return Number(quality.toFixed(3));
+}
+
+// ============================================================================
+// 清洗与构建
+// ============================================================================
+
+/**
+ * 清洗 edges 三元组
+ * @param {object[]} raw
+ * @returns {object[]}
+ */
+function sanitizeEdges(raw) {
+    if (!Array.isArray(raw)) return [];
+    return raw
+        .filter(e => e && typeof e === 'object')
+        .map(e => ({
+            s: String(e.s || '').trim(),
+            t: String(e.t || '').trim(),
+            r: sanitizeActionPhrase(e.r),
+        }))
+        .filter(e => e.s && e.t && e.r)
+        .slice(0, 3);
+}
+
+/**
+ * 将解析后的 anchor 转换为 atom 存储对象
+ *
+ * semantic = scene（纯自然语言，直接用于 embedding）
+ *
+ * @param {object} anchor - LLM 输出的 anchor 对象
+ * @param {number} aiFloor - AI 消息楼层号
+ * @param {number} idx - 同楼层序号（0 或 1）
+ * @returns {object|null} atom 对象
+ */
+function anchorToAtom(anchor, aiFloor, idx) {
+    const scene = String(anchor.scene || '').trim();
+    if (!scene) return null;
+
+    // scene 过短（< 15 字）可能是噪音
+    if (scene.length < 15) return null;
+    const edges = sanitizeEdges(anchor.edges);
+    const where = String(anchor.where || '').trim();
+    const quality = calcAtomQuality(scene, edges, where);
+
+    return {
+        atomId: `atom-${aiFloor}-${idx}`,
+        floor: aiFloor,
+        source: 'ai',
+
+        // ═══ 检索层（embedding 的唯一入口） ═══
+        semantic: scene,
+
+        // ═══ 图结构层（扩散的 key） ═══
+        edges,
+        where,
+        quality,
+    };
+}
+
+// ============================================================================
+// 单轮提取（带重试）
+// ============================================================================
+
+async function extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options = {}) {
+    const { timeout = DEFAULT_TIMEOUT } = options;
+
+    if (!aiMessage?.mes?.trim()) return [];
+
+    const parts = [];
+    const userName = userMessage?.name || '用户';
+
+    if (userMessage?.mes?.trim()) {
+        const userText = filterText(userMessage.mes);
+        parts.push(`<user name="${userName}">\n${userText}\n</user>`);
+    }
+
+    const aiText = filterText(aiMessage.mes);
+    parts.push(`<assistant>\n${aiText}\n</assistant>`);
+
+    const input = `<round>\n${parts.join('\n')}\n</round>`;
+
+    for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
+        if (batchCancelled) return [];
+
+        try {
+            const response = await callLLM([
+                { role: 'system', content: SYSTEM_PROMPT },
+                { role: 'user', content: input },
+                { role: 'assistant', content: JSON_PREFILL },
+            ], {
+                temperature: 0.3,
+                max_tokens: 600,
+                timeout,
+            });
+
+            const rawText = String(response || '');
+            if (!rawText.trim()) {
+                if (attempt < RETRY_COUNT) {
+                    await sleep(RETRY_DELAY);
+                    continue;
+                }
+                return null;
+            }
+
+            const fullJson = JSON_PREFILL + rawText;
+
+            let parsed;
+            try {
+                parsed = parseJson(fullJson);
+            } catch (e) {
+                xbLog.warn(MODULE_ID, `floor ${aiFloor} JSON解析失败 (attempt ${attempt})`);
+                if (attempt < RETRY_COUNT) {
+                    await sleep(RETRY_DELAY);
+                    continue;
+                }
+                return null;
+            }
+
+            // 兼容：优先 anchors，回退 atoms
+            const rawAnchors = parsed?.anchors;
+            if (!rawAnchors || !Array.isArray(rawAnchors)) {
+                if (attempt < RETRY_COUNT) {
+                    await sleep(RETRY_DELAY);
+                    continue;
+                }
+                return null;
+            }
+
+            // 转换为 atom 存储格式（最多 2 个）
+            const atoms = rawAnchors
+                .slice(0, 2)
+                .map((a, idx) => anchorToAtom(a, aiFloor, idx))
+                .filter(Boolean);
+
+            return atoms;
+
+        } catch (e) {
+            if (batchCancelled) return null;
+
+            if (attempt < RETRY_COUNT) {
+                await sleep(RETRY_DELAY * (attempt + 1));
+                continue;
+            }
+            xbLog.error(MODULE_ID, `floor ${aiFloor} 失败`, e);
+            return null;
+        }
+    }
+
+    return null;
+}
+
+export async function extractAtomsForRound(userMessage, aiMessage, aiFloor, options = {}) {
+    return extractAtomsForRoundWithRetry(userMessage, aiMessage, aiFloor, options);
+}
+
+// ============================================================================
+// 批量提取
+// ============================================================================
+
+export async function batchExtractAtoms(chat, onProgress) {
+    if (!chat?.length) return [];
+
+    batchCancelled = false;
+
+    const pairs = [];
+    for (let i = 0; i < chat.length; i++) {
+        if (!chat[i].is_user) {
+            const userMsg = (i > 0 && chat[i - 1]?.is_user) ? chat[i - 1] : null;
+            pairs.push({ userMsg, aiMsg: chat[i], aiFloor: i });
+        }
+    }
+
+    if (!pairs.length) return [];
+
+    const allAtoms = [];
+    let completed = 0;
+    let failed = 0;
+
+    for (let i = 0; i < pairs.length; i += CONCURRENCY) {
+        if (batchCancelled) break;
+
+        const batch = pairs.slice(i, i + CONCURRENCY);
+
+        if (i === 0) {
+            const promises = batch.map((pair, idx) => (async () => {
+                await sleep(idx * STAGGER_DELAY);
+
+                if (batchCancelled) return;
+
+                try {
+                    const atoms = await extractAtomsForRoundWithRetry(
+                        pair.userMsg,
+                        pair.aiMsg,
+                        pair.aiFloor,
+                        { timeout: DEFAULT_TIMEOUT }
+                    );
+                    if (atoms?.length) {
+                        allAtoms.push(...atoms);
+                    } else if (atoms === null) {
+                        failed++;
+                    }
+                } catch {
+                    failed++;
+                }
+                completed++;
+                onProgress?.(completed, pairs.length, failed);
+            })());
+            await Promise.all(promises);
+        } else {
+            const promises = batch.map(pair =>
+                extractAtomsForRoundWithRetry(
+                    pair.userMsg,
+                    pair.aiMsg,
+                    pair.aiFloor,
+                    { timeout: DEFAULT_TIMEOUT }
+                )
+                    .then(atoms => {
+                        if (batchCancelled) return;
+                        if (atoms?.length) {
+                            allAtoms.push(...atoms);
+                        } else if (atoms === null) {
+                            failed++;
+                        }
+                        completed++;
+                        onProgress?.(completed, pairs.length, failed);
+                    })
+                    .catch(() => {
+                        if (batchCancelled) return;
+                        failed++;
+                        completed++;
+                        onProgress?.(completed, pairs.length, failed);
+                    })
+            );
+
+            await Promise.all(promises);
+        }
+
+        if (i + CONCURRENCY < pairs.length && !batchCancelled) {
+            await sleep(30);
+        }
+    }
+
+    xbLog.info(MODULE_ID, `批量提取完成: ${allAtoms.length} atoms, ${failed} 失败`);
+
+    return allAtoms;
+}
+
--- a/modules/story-summary/vector/llm/llm-service.js
+++ b/modules/story-summary/vector/llm/llm-service.js
@@ -0,0 +1,99 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// vector/llm/llm-service.js - 修复 prefill 传递方式
+// ═══════════════════════════════════════════════════════════════════════════
+import { xbLog } from '../../../../core/debug-core.js';
+import { getVectorConfig } from '../../data/config.js';
+
+const MODULE_ID = 'vector-llm-service';
+const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
+const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
+
+let callCounter = 0;
+
+function getStreamingModule() {
+    const mod = window.xiaobaixStreamingGeneration;
+    return mod?.xbgenrawCommand ? mod : null;
+}
+
+function generateUniqueId(prefix = 'llm') {
+    callCounter = (callCounter + 1) % 100000;
+    return `${prefix}-${callCounter}-${Date.now().toString(36)}`;
+}
+
+function b64UrlEncode(str) {
+    const utf8 = new TextEncoder().encode(String(str));
+    let bin = '';
+    utf8.forEach(b => bin += String.fromCharCode(b));
+    return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
+}
+
+/**
+ * 统一LLM调用 - 走酒馆后端（非流式）
+ * assistant prefill 用 bottomassistant 参数传递
+ */
+export async function callLLM(messages, options = {}) {
+    const {
+        temperature = 0.2,
+        max_tokens = 500,
+    } = options;
+
+    const mod = getStreamingModule();
+    if (!mod) throw new Error('Streaming module not ready');
+
+    const cfg = getVectorConfig();
+    const apiKey = cfg?.online?.key || '';
+    if (!apiKey) {
+        throw new Error('L0 requires siliconflow API key');
+    }
+
+    // 分离 assistant prefill
+    let topMessages = [...messages];
+    let assistantPrefill = '';
+
+    if (topMessages.length > 0 && topMessages[topMessages.length - 1]?.role === 'assistant') {
+        const lastMsg = topMessages.pop();
+        assistantPrefill = lastMsg.content || '';
+    }
+
+    const top64 = b64UrlEncode(JSON.stringify(topMessages));
+    const uniqueId = generateUniqueId('l0');
+
+    const args = {
+        as: 'user',
+        nonstream: 'true',
+        top64,
+        id: uniqueId,
+        temperature: String(temperature),
+        max_tokens: String(max_tokens),
+        api: 'openai',
+        apiurl: SILICONFLOW_API_URL,
+        apipassword: apiKey,
+        model: DEFAULT_L0_MODEL,
+    };
+    const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
+    if (isQwen3) {
+        args.enable_thinking = 'false';
+    }
+
+    // ★ 用 bottomassistant 参数传递 prefill
+    if (assistantPrefill) {
+        args.bottomassistant = assistantPrefill;
+    }
+
+    try {
+        const result = await mod.xbgenrawCommand(args, '');
+        return String(result ?? '');
+    } catch (e) {
+        xbLog.error(MODULE_ID, 'LLM调用失败', e);
+        throw e;
+    }
+}
+
+export function parseJson(text) {
+    if (!text) return null;
+    let s = text.trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/i, '').trim();
+    try { return JSON.parse(s); } catch { }
+    const i = s.indexOf('{'), j = s.lastIndexOf('}');
+    if (i !== -1 && j > i) try { return JSON.parse(s.slice(i, j + 1)); } catch { }
+    return null;
+}
--- a/modules/story-summary/vector/llm/reranker.js
+++ b/modules/story-summary/vector/llm/reranker.js
@@ -0,0 +1,266 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// Reranker - 硅基 bge-reranker-v2-m3
+// 对候选文档进行精排，过滤与 query 不相关的内容
+// ═══════════════════════════════════════════════════════════════════════════
+
+import { xbLog } from '../../../../core/debug-core.js';
+import { getApiKey } from './siliconflow.js';
+
+const MODULE_ID = 'reranker';
+const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
+const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
+const DEFAULT_TIMEOUT = 15000;
+const MAX_DOCUMENTS = 100;  // API 限制
+const RERANK_BATCH_SIZE = 20;
+const RERANK_MAX_CONCURRENCY = 5;
+
+/**
+ * 对文档列表进行 Rerank 精排
+ * 
+ * @param {string} query - 查询文本
+ * @param {Array<string>} documents - 文档文本列表
+ * @param {object} options - 选项
+ * @param {number} options.topN - 返回前 N 个结果，默认 40
+ * @param {number} options.timeout - 超时时间，默认 15000ms
+ * @param {AbortSignal} options.signal - 取消信号
+ * @returns {Promise<Array<{index: number, relevance_score: number}>>} 排序后的结果
+ */
+export async function rerank(query, documents, options = {}) {
+    const { topN = 40, timeout = DEFAULT_TIMEOUT, signal } = options;
+
+    if (!query?.trim()) {
+        xbLog.warn(MODULE_ID, 'query 为空，跳过 rerank');
+        return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
+    }
+
+    if (!documents?.length) {
+        return { results: [], failed: false };
+    }
+
+    const key = getApiKey();
+    if (!key) {
+        xbLog.warn(MODULE_ID, '未配置 API Key，跳过 rerank');
+        return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
+    }
+
+    // 截断超长文档列表
+    const truncatedDocs = documents.slice(0, MAX_DOCUMENTS);
+    if (documents.length > MAX_DOCUMENTS) {
+        xbLog.warn(MODULE_ID, `文档数 ${documents.length} 超过限制 ${MAX_DOCUMENTS}，已截断`);
+    }
+
+    // 过滤空文档，记录原始索引
+    const validDocs = [];
+    const indexMap = [];  // validDocs index → original index
+    
+    for (let i = 0; i < truncatedDocs.length; i++) {
+        const text = String(truncatedDocs[i] || '').trim();
+        if (text) {
+            validDocs.push(text);
+            indexMap.push(i);
+        }
+    }
+
+    if (!validDocs.length) {
+        xbLog.warn(MODULE_ID, '无有效文档，跳过 rerank');
+        return { results: [], failed: false };
+    }
+
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), timeout);
+
+    try {
+        const T0 = performance.now();
+
+        const response = await fetch(RERANK_URL, {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${key}`,
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({
+                model: RERANK_MODEL,
+                // Zero-darkbox: do not silently truncate query.
+                query,
+                documents: validDocs,
+                top_n: Math.min(topN, validDocs.length),
+                return_documents: false,
+            }),
+            signal: signal || controller.signal,
+        });
+
+        clearTimeout(timeoutId);
+
+        if (!response.ok) {
+            const errorText = await response.text().catch(() => '');
+            throw new Error(`Rerank API ${response.status}: ${errorText.slice(0, 200)}`);
+        }
+
+        const data = await response.json();
+        const results = data.results || [];
+
+        // 映射回原始索引
+        const mapped = results.map(r => ({
+            index: indexMap[r.index],
+            relevance_score: r.relevance_score ?? 0,
+        }));
+
+        const elapsed = Math.round(performance.now() - T0);
+        xbLog.info(MODULE_ID, `Rerank 完成: ${validDocs.length} docs → ${results.length} selected (${elapsed}ms)`);
+
+        return { results: mapped, failed: false };
+
+    } catch (e) {
+        clearTimeout(timeoutId);
+
+        if (e?.name === 'AbortError') {
+            xbLog.warn(MODULE_ID, 'Rerank 超时或取消');
+        } else {
+            xbLog.error(MODULE_ID, 'Rerank 失败', e);
+        }
+
+        // 降级：返回原顺序，分数均匀分布
+        return {
+            results: documents.slice(0, topN).map((_, i) => ({
+                index: i,
+                relevance_score: 0,
+            })),
+            failed: true,
+        };
+    }
+}
+
+/**
+ * 对 chunk 对象列表进行 Rerank
+ * 
+ * @param {string} query - 查询文本
+ * @param {Array<object>} chunks - chunk 对象列表，需要有 text 字段
+ * @param {object} options - 选项
+ * @returns {Promise<Array<object>>} 排序后的 chunk 列表，带 _rerankScore 字段
+ */
+export async function rerankChunks(query, chunks, options = {}) {
+    const { topN = 40, minScore = 0.1 } = options;
+
+    if (!chunks?.length) return [];
+
+    const texts = chunks.map(c => c.text || c.semantic || '');
+
+    // ─── 单批：直接调用 ───
+    if (texts.length <= RERANK_BATCH_SIZE) {
+        const { results, failed } = await rerank(query, texts, {
+            topN: Math.min(topN, texts.length),
+            timeout: options.timeout,
+            signal: options.signal,
+        });
+
+        if (failed) {
+            return chunks.map(c => ({ ...c, _rerankScore: 0, _rerankFailed: true }));
+        }
+
+        return results
+            .filter(r => r.relevance_score >= minScore)
+            .sort((a, b) => b.relevance_score - a.relevance_score)
+            .slice(0, topN)
+            .map(r => ({
+                ...chunks[r.index],
+                _rerankScore: r.relevance_score,
+            }));
+    }
+
+    // ─── 多批：拆分 → 并发 → 合并 ───
+    const batches = [];
+    for (let i = 0; i < texts.length; i += RERANK_BATCH_SIZE) {
+        batches.push({
+            texts: texts.slice(i, i + RERANK_BATCH_SIZE),
+            offset: i,
+        });
+    }
+
+    const concurrency = Math.min(batches.length, RERANK_MAX_CONCURRENCY);
+    xbLog.info(MODULE_ID, `并发 Rerank: ${batches.length} 批 × ≤${RERANK_BATCH_SIZE} docs, concurrency=${concurrency}`);
+
+    const batchResults = new Array(batches.length);
+    let failedBatches = 0;
+
+    const runBatch = async (batchIdx) => {
+        const batch = batches[batchIdx];
+        const { results, failed } = await rerank(query, batch.texts, {
+            topN: batch.texts.length,
+            timeout: options.timeout,
+            signal: options.signal,
+        });
+
+        if (failed) {
+            failedBatches++;
+            // 单批降级：保留原始顺序，score=0
+            batchResults[batchIdx] = batch.texts.map((_, i) => ({
+                globalIndex: batch.offset + i,
+                relevance_score: 0,
+                _batchFailed: true,
+            }));
+        } else {
+            batchResults[batchIdx] = results.map(r => ({
+                globalIndex: batch.offset + r.index,
+                relevance_score: r.relevance_score,
+            }));
+        }
+    };
+
+    // 并发池
+    let nextIdx = 0;
+    const worker = async () => {
+        while (nextIdx < batches.length) {
+            const idx = nextIdx++;
+            await runBatch(idx);
+        }
+    };
+    await Promise.all(Array.from({ length: concurrency }, () => worker()));
+
+    // 全部失败 → 整体降级
+    if (failedBatches === batches.length) {
+        xbLog.warn(MODULE_ID, `全部 ${batches.length} 批 rerank 失败，整体降级`);
+        return chunks.slice(0, topN).map(c => ({
+            ...c,
+            _rerankScore: 0,
+            _rerankFailed: true,
+        }));
+    }
+
+    // 合并所有批次结果
+    const merged = batchResults.flat();
+
+    const selected = merged
+        .filter(r => r._batchFailed || r.relevance_score >= minScore)
+        .sort((a, b) => b.relevance_score - a.relevance_score)
+        .slice(0, topN)
+        .map(r => ({
+            ...chunks[r.globalIndex],
+            _rerankScore: r.relevance_score,
+            ...(r._batchFailed ? { _rerankFailed: true } : {}),
+        }));
+
+    xbLog.info(MODULE_ID,
+        `Rerank 合并: ${merged.length} candidates, ${failedBatches}/${batches.length} 批失败, 选中 ${selected.length}`
+    );
+
+    return selected;
+}
+/**
+ * 测试 Rerank 服务连接
+ */
+export async function testRerankService() {
+    const key = getApiKey();
+    if (!key) {
+        throw new Error('请配置硅基 API Key');
+    }
+
+    try {
+        const { results } = await rerank('测试查询', ['测试文档1', '测试文档2'], { topN: 2 });
+        return { 
+            success: true, 
+            message: `连接成功，返回 ${results.length} 个结果`,
+        };
+    } catch (e) {
+        throw new Error(`连接失败: ${e.message}`);
+    }
+}
--- a/modules/story-summary/vector/llm/siliconflow.js
+++ b/modules/story-summary/vector/llm/siliconflow.js
@@ -0,0 +1,59 @@
+// ═══════════════════════════════════════════════════════════════════════════
+// siliconflow.js - 仅保留 Embedding
+// ═══════════════════════════════════════════════════════════════════════════
+
+const BASE_URL = 'https://api.siliconflow.cn';
+const EMBEDDING_MODEL = 'BAAI/bge-m3';
+
+export function getApiKey() {
+    try {
+        const raw = localStorage.getItem('summary_panel_config');
+        if (raw) {
+            const parsed = JSON.parse(raw);
+            return parsed.vector?.online?.key || null;
+        }
+    } catch { }
+    return null;
+}
+
+export async function embed(texts, options = {}) {
+    if (!texts?.length) return [];
+
+    const key = getApiKey();
+    if (!key) throw new Error('未配置硅基 API Key');
+
+    const { timeout = 30000, signal } = options;
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), timeout);
+
+    try {
+        const response = await fetch(`${BASE_URL}/v1/embeddings`, {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${key}`,
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({
+                model: EMBEDDING_MODEL,
+                input: texts,
+            }),
+            signal: signal || controller.signal,
+        });
+
+        clearTimeout(timeoutId);
+
+        if (!response.ok) {
+            const errorText = await response.text().catch(() => '');
+            throw new Error(`Embedding ${response.status}: ${errorText.slice(0, 200)}`);
+        }
+
+        const data = await response.json();
+        return (data.data || [])
+            .sort((a, b) => a.index - b.index)
+            .map(item => Array.isArray(item.embedding) ? item.embedding : Array.from(item.embedding));
+    } finally {
+        clearTimeout(timeoutId);
+    }
+}
+
+export { EMBEDDING_MODEL as MODELS };