feat(story-summary): make vector APIs configurable

2026-04-03 15:31:13 +08:00
parent 5424dae2d6
commit af7e0f689d
9 changed files with 468 additions and 187 deletions
--- a/modules/story-summary/vector/llm/llm-service.js
+++ b/modules/story-summary/vector/llm/llm-service.js
@@ -2,14 +2,15 @@
 // vector/llm/llm-service.js - 修复 prefill 传递方式
 // ═══════════════════════════════════════════════════════════════════════════
 import { xbLog } from '../../../../core/debug-core.js';
-import { getApiKey } from './siliconflow.js';
+import { getVectorConfig } from '../../data/config.js';

 const MODULE_ID = 'vector-llm-service';
-const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
 const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
+const DEFAULT_L0_API_URL = 'https://api.siliconflow.cn/v1';

 let callCounter = 0;
 const activeL0SessionIds = new Set();
+let l0KeyIndex = 0;

 function getStreamingModule() {
    const mod = window.xiaobaixStreamingGeneration;
@@ -28,6 +29,28 @@ function b64UrlEncode(str) {
    return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
 }

+function getL0ApiConfig() {
+    const cfg = getVectorConfig() || {};
+    return cfg.l0Api || {
+        provider: 'siliconflow',
+        url: DEFAULT_L0_API_URL,
+        key: '',
+        model: DEFAULT_L0_MODEL,
+    };
+}
+
+function getNextKey(rawKey) {
+    const keys = String(rawKey || '')
+        .split(/[,;|\n]+/)
+        .map(k => k.trim())
+        .filter(Boolean);
+    if (!keys.length) return '';
+    if (keys.length === 1) return keys[0];
+    const idx = l0KeyIndex % keys.length;
+    l0KeyIndex = (l0KeyIndex + 1) % keys.length;
+    return keys[idx];
+}
+
 /**
 * 统一LLM调用 - 走酒馆后端（非流式）
 * 临时改为标准 messages 调用，避免 bottomassistant prefill 兼容性问题。
@@ -42,7 +65,8 @@ export async function callLLM(messages, options = {}) {
    const mod = getStreamingModule();
    if (!mod) throw new Error('Streaming module not ready');

-    const apiKey = getApiKey() || '';
+    const apiCfg = getL0ApiConfig();
+    const apiKey = getNextKey(apiCfg.key);
    if (!apiKey) {
        throw new Error('L0 requires siliconflow API key');
    }
@@ -60,11 +84,11 @@ export async function callLLM(messages, options = {}) {
        temperature: String(temperature),
        max_tokens: String(max_tokens),
        api: 'openai',
-        apiurl: SILICONFLOW_API_URL,
+        apiurl: String(apiCfg.url || DEFAULT_L0_API_URL).trim(),
        apipassword: apiKey,
-        model: DEFAULT_L0_MODEL,
+        model: String(apiCfg.model || DEFAULT_L0_MODEL).trim(),
    };
-    const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
+    const isQwen3 = String(args.model || '').includes('Qwen3');
    if (isQwen3) {
        args.enable_thinking = 'false';
    }
--- a/modules/story-summary/vector/llm/reranker.js
+++ b/modules/story-summary/vector/llm/reranker.js
@@ -4,15 +4,38 @@
 // ═══════════════════════════════════════════════════════════════════════════

 import { xbLog } from '../../../../core/debug-core.js';
-import { getApiKey } from './siliconflow.js';
+import { getVectorConfig } from '../../data/config.js';

 const MODULE_ID = 'reranker';
-const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
+const DEFAULT_RERANK_URL = 'https://api.siliconflow.cn/v1';
 const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
 const DEFAULT_TIMEOUT = 15000;
 const MAX_DOCUMENTS = 100;  // API 限制
 const RERANK_BATCH_SIZE = 20;
 const RERANK_MAX_CONCURRENCY = 5;
+let rerankKeyIndex = 0;
+
+function getRerankApiConfig() {
+    const cfg = getVectorConfig() || {};
+    return cfg.rerankApi || {
+        provider: 'siliconflow',
+        url: DEFAULT_RERANK_URL,
+        key: '',
+        model: RERANK_MODEL,
+    };
+}
+
+function getNextRerankKey(rawKey) {
+    const keys = String(rawKey || '')
+        .split(/[,;|\n]+/)
+        .map(k => k.trim())
+        .filter(Boolean);
+    if (!keys.length) return '';
+    if (keys.length === 1) return keys[0];
+    const idx = rerankKeyIndex % keys.length;
+    rerankKeyIndex = (rerankKeyIndex + 1) % keys.length;
+    return keys[idx];
+}

 /**
 * 对文档列表进行 Rerank 精排
@@ -37,7 +60,8 @@ export async function rerank(query, documents, options = {}) {
        return { results: [], failed: false };
    }

-    const key = getApiKey();
+    const apiCfg = getRerankApiConfig();
+    const key = getNextRerankKey(apiCfg.key);
    if (!key) {
        xbLog.warn(MODULE_ID, '未配置 API Key，跳过 rerank');
        return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
@@ -72,14 +96,15 @@ export async function rerank(query, documents, options = {}) {
    try {
        const T0 = performance.now();

-        const response = await fetch(RERANK_URL, {
+        const baseUrl = String(apiCfg.url || DEFAULT_RERANK_URL).replace(/\/+$/, '');
+        const response = await fetch(`${baseUrl}/rerank`, {
            method: 'POST',
            headers: {
                'Authorization': `Bearer ${key}`,
                'Content-Type': 'application/json',
            },
            body: JSON.stringify({
-                model: RERANK_MODEL,
+                model: String(apiCfg.model || RERANK_MODEL),
                // Zero-darkbox: do not silently truncate query.
                query,
                documents: validDocs,
--- a/modules/story-summary/vector/llm/siliconflow.js
+++ b/modules/story-summary/vector/llm/siliconflow.js
@@ -1,31 +1,39 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// siliconflow.js - Embedding + 多 Key 轮询
+// siliconflow.js - OpenAI-compatible Embedding + 多 Key 轮询
 //
 // 在 API Key 输入框中用逗号、分号、竖线或换行分隔多个 Key，例如：
 //   sk-aaa,sk-bbb,sk-ccc
 // 每次调用自动轮询到下一个 Key，并发请求会均匀分布到所有 Key 上。
 // ═══════════════════════════════════════════════════════════════════════════

+import { getVectorConfig } from '../../data/config.js';
+
 const BASE_URL = 'https://api.siliconflow.cn';
 const EMBEDDING_MODEL = 'BAAI/bge-m3';

 // ★ 多 Key 轮询状态
 let _keyIndex = 0;

+function getEmbeddingApiConfig() {
+    const cfg = getVectorConfig() || {};
+    return cfg.embeddingApi || {
+        provider: 'siliconflow',
+        url: `${BASE_URL}/v1`,
+        key: '',
+        model: EMBEDDING_MODEL,
+    };
+}
+
 /**
 * 从 localStorage 解析所有 Key（支持逗号、分号、竖线、换行分隔）
 */
-function parseKeys() {
+function parseKeys(rawKey) {
    try {
-        const raw = localStorage.getItem('summary_panel_config');
-        if (raw) {
-            const parsed = JSON.parse(raw);
-            const keyStr = parsed.vector?.online?.key || '';
-            return keyStr
-                .split(/[,;|\n]+/)
-                .map(k => k.trim())
-                .filter(k => k.length > 0);
-        }
+        const keyStr = String(rawKey || '');
+        return keyStr
+            .split(/[,;|\n]+/)
+            .map(k => k.trim())
+            .filter(k => k.length > 0);
    } catch { }
    return [];
 }
@@ -34,8 +42,8 @@ function parseKeys() {
 * 获取下一个可用的 API Key（轮询）
 * 每次调用返回不同的 Key，自动循环
 */
-export function getApiKey() {
-    const keys = parseKeys();
+export function getApiKey(rawKey = null) {
+    const keys = parseKeys(rawKey ?? getEmbeddingApiConfig().key);
    if (!keys.length) return null;
    if (keys.length === 1) return keys[0];

@@ -51,7 +59,7 @@ export function getApiKey() {
 * 获取当前配置的 Key 数量（供外部模块动态调整并发用）
 */
 export function getKeyCount() {
-    return Math.max(1, parseKeys().length);
+    return Math.max(1, parseKeys(getEmbeddingApiConfig().key).length);
 }

 // ═══════════════════════════════════════════════════════════════════════════
@@ -61,22 +69,24 @@ export function getKeyCount() {
 export async function embed(texts, options = {}) {
    if (!texts?.length) return [];

-    const key = getApiKey();
-    if (!key) throw new Error('未配置硅基 API Key');
+    const apiCfg = options.apiConfig || getEmbeddingApiConfig();
+    const key = getApiKey(apiCfg.key);
+    if (!key) throw new Error('未配置 Embedding API Key');

    const { timeout = 30000, signal } = options;
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), timeout);

    try {
-        const response = await fetch(`${BASE_URL}/v1/embeddings`, {
+        const baseUrl = String(apiCfg.url || `${BASE_URL}/v1`).replace(/\/+$/, '');
+        const response = await fetch(`${baseUrl}/embeddings`, {
            method: 'POST',
            headers: {
                'Authorization': `Bearer ${key}`,
                'Content-Type': 'application/json',
            },
            body: JSON.stringify({
-                model: EMBEDDING_MODEL,
+                model: String(apiCfg.model || EMBEDDING_MODEL),
                input: texts,
            }),
            signal: signal || controller.signal,
--- a/modules/story-summary/vector/pipeline/state-integration.js
+++ b/modules/story-summary/vector/pipeline/state-integration.js
@@ -29,7 +29,7 @@ import { filterText } from '../utils/text-filter.js';
 const MODULE_ID = 'state-integration';

 // ★ 并发配置
-const CONCURRENCY = 10;
+const DEFAULT_CONCURRENCY = 10;
 const STAGGER_DELAY = 15;
 const DEBUG_CONCURRENCY = true;
 const R_AGG_MAX_CHARS = 256;
@@ -168,7 +168,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
        return { built: 0 };
    }

-    xbLog.info(MODULE_ID, `增量 L0 提取：pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`);
+    const concurrency = Math.max(1, Math.min(50, Number(vectorCfg?.l0Concurrency) || DEFAULT_CONCURRENCY));
+
+    xbLog.info(MODULE_ID, `增量 L0 提取：pending=${pendingPairs.length}, concurrency=${concurrency}`);

    let completed = 0;
    let failed = 0;
@@ -181,14 +183,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
    // ★ Phase 1: 收集所有新提取的 atoms（不向量化）
    const allNewAtoms = [];

-    // ★ 限流检测：连续失败 N 次后暂停并降速
-    let consecutiveFailures = 0;
-    let rateLimited = false;
-    const RATE_LIMIT_THRESHOLD = 6;       // 连续失败多少次触发限流保护
-    const RATE_LIMIT_WAIT_MS = 60000;      // 限流后等待时间（60 秒）
-    const RETRY_INTERVAL_MS = 1000;        // 降速模式下每次请求间隔（1 秒）
-    const RETRY_CONCURRENCY = 1;           // ★ 降速模式下的并发数（默认1，建议不要超过5）
-
    // ★ 通用处理单个 pair 的逻辑（复用于正常模式和降速模式）
    const processPair = async (pair, idx, workerId) => {
        const floor = pair.aiFloor;
@@ -209,9 +203,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
                throw new Error('llm_failed');
            }

-            // ★ 成功：重置连续失败计数
-            consecutiveFailures = 0;
-
            if (!atoms.length) {
                setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
            } else {
@@ -231,13 +222,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
                reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
            });
            failed++;
-
-            // ★ 限流检测：连续失败累加
-            consecutiveFailures++;
-            if (consecutiveFailures >= RATE_LIMIT_THRESHOLD && !rateLimited) {
-                rateLimited = true;
-                xbLog.warn(MODULE_ID, `连续失败 ${consecutiveFailures} 次，疑似触发 API 限流，将暂停所有并发`);
-            }
        } finally {
            active--;
            if (!extractionCancelled) {
@@ -252,12 +236,12 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
    };

    // ★ 并发池处理（保持固定并发度）
-    const poolSize = Math.min(CONCURRENCY, pendingPairs.length);
+    const poolSize = Math.min(concurrency, pendingPairs.length);
    let nextIndex = 0;
    let started = 0;
    const runWorker = async (workerId) => {
        while (true) {
-            if (extractionCancelled || rateLimited) return;
+            if (extractionCancelled) return;
            const idx = nextIndex++;
            if (idx >= pendingPairs.length) return;

@@ -267,7 +251,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
                await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY));
            }

-            if (extractionCancelled || rateLimited) return;
+            if (extractionCancelled) return;

            await processPair(pair, idx, workerId);
        }
@@ -279,61 +263,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
        xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`);
    }

-    // ═════════════════════════════════════════════════════════════════════
-    // ★ 限流恢复：重置进度，从头开始以限速模式慢慢跑
-    // ═════════════════════════════════════════════════════════════════════
-    if (rateLimited && !extractionCancelled) {
-        const waitSec = RATE_LIMIT_WAIT_MS / 1000;
-        xbLog.info(MODULE_ID, `限流保护：将重置进度并从头开始降速重来（并发=${RETRY_CONCURRENCY}, 间隔=${RETRY_INTERVAL_MS}ms）`);
-        onProgress?.(`疑似限流，${waitSec}s 后降速重头开始...`, completed, total);
-
-        await new Promise(r => setTimeout(r, RATE_LIMIT_WAIT_MS));
-
-        if (!extractionCancelled) {
-            // ★ 核心逻辑：重置计数器，让 UI 从 0 开始跑，给用户“重头开始”的反馈
-            rateLimited = false;
-            consecutiveFailures = 0;
-            completed = 0;
-            failed = 0;
-
-            let retryNextIdx = 0;
-
-            xbLog.info(MODULE_ID, `限流恢复：开始降速模式扫描 ${pendingPairs.length} 个楼层`);
-
-            const retryWorkers = Math.min(RETRY_CONCURRENCY, pendingPairs.length);
-            const runRetryWorker = async (wid) => {
-                while (true) {
-                    if (extractionCancelled) return;
-                    const idx = retryNextIdx++;
-                    if (idx >= pendingPairs.length) return;
-
-                    const pair = pendingPairs[idx];
-                    const floor = pair.aiFloor;
-
-                    // ★ 检查该楼层状态
-                    const st = getL0FloorStatus(floor);
-                    if (st?.status === 'ok' || st?.status === 'empty') {
-                        // 刚才已经成功了，直接跳过（仅增加进度计数）
-                        completed++;
-                        onProgress?.(`提取: ${completed}/${total} (跳过已完成)`, completed, total);
-                        continue;
-                    }
-
-                    // ★ 没做过的，用 slow 模式处理
-                    await processPair(pair, idx, `retry-${wid}`);
-
-                    // 每个请求后休息，避免再次触发限流
-                    if (idx < pendingPairs.length - 1 && RETRY_INTERVAL_MS > 0) {
-                        await new Promise(r => setTimeout(r, RETRY_INTERVAL_MS));
-                    }
-                }
-            };
-
-            await Promise.all(Array.from({ length: retryWorkers }, (_, i) => runRetryWorker(i)));
-            xbLog.info(MODULE_ID, `降速重头开始阶段结束`);
-        }
-    }
-
    try {
        saveMetadataDebounced?.();
    } catch { }
--- a/modules/story-summary/vector/utils/embedder.js
+++ b/modules/story-summary/vector/utils/embedder.js
@@ -1,15 +1,13 @@
 // ═══════════════════════════════════════════════════════════════════════════
-// Story Summary - Embedder (v2 - 统一硅基)
-// 所有 embedding 请求转发到 siliconflow.js
+// Story Summary - Embedder
 // ═══════════════════════════════════════════════════════════════════════════

-import { embed as sfEmbed, getApiKey } from '../llm/siliconflow.js';
+import { embed as sfEmbed } from '../llm/siliconflow.js';
 // ═══════════════════════════════════════════════════════════════════════════
 // 统一 embed 接口
 // ═══════════════════════════════════════════════════════════════════════════

 export async function embed(texts, config, options = {}) {
-    // 忽略旧的 config 参数，统一走硅基
    return await sfEmbed(texts, options);
 }

@@ -18,8 +16,10 @@ export async function embed(texts, config, options = {}) {
 // ═══════════════════════════════════════════════════════════════════════════

 export function getEngineFingerprint(config) {
-    // 统一使用硅基 bge-m3
-    return 'siliconflow:bge-m3:1024';
+    const api = config?.embeddingApi || {};
+    const provider = String(api.provider || 'siliconflow').toLowerCase();
+    const model = String(api.model || 'BAAI/bge-m3').trim() || 'BAAI/bge-m3';
+    return `${provider}:${model}:1024`;
 }

 // ═══════════════════════════════════════════════════════════════════════════
@@ -47,14 +47,13 @@ export async function deleteLocalModelCache() { }
 // 在线服务测试
 // ═══════════════════════════════════════════════════════════════════════════

-export async function testOnlineService() {
-    const key = getApiKey();
-    if (!key) {
-        throw new Error('请配置硅基 API Key');
+export async function testOnlineService(_provider, config = {}) {
+    if (!config?.key) {
+        throw new Error('请配置 Embedding API Key');
    }

    try {
-        const [vec] = await sfEmbed(['测试连接']);
+        const [vec] = await sfEmbed(['测试连接'], { apiConfig: config });
        return { success: true, dims: vec?.length || 0 };
    } catch (e) {
        throw new Error(`连接失败: ${e.message}`);
@@ -62,7 +61,6 @@ export async function testOnlineService() {
 }

 export async function fetchOnlineModels() {
-    // 硅基模型固定
    return ['BAAI/bge-m3'];
 }

@@ -78,6 +76,6 @@ export const ONLINE_PROVIDERS = {
    siliconflow: {
        id: 'siliconflow',
        name: '硅基流动',
-        baseUrl: 'https://api.siliconflow.cn',
+        baseUrl: 'https://api.siliconflow.cn/v1',
    },
 };