feat(story-summary): make vector APIs configurable

This commit is contained in:
2026-04-03 15:31:13 +08:00
parent 5424dae2d6
commit af7e0f689d
9 changed files with 468 additions and 187 deletions

View File

@@ -2,14 +2,15 @@
// vector/llm/llm-service.js - 修复 prefill 传递方式
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getApiKey } from './siliconflow.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'vector-llm-service';
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
const DEFAULT_L0_API_URL = 'https://api.siliconflow.cn/v1';
let callCounter = 0;
const activeL0SessionIds = new Set();
let l0KeyIndex = 0;
function getStreamingModule() {
const mod = window.xiaobaixStreamingGeneration;
@@ -28,6 +29,28 @@ function b64UrlEncode(str) {
return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
}
function getL0ApiConfig() {
const cfg = getVectorConfig() || {};
return cfg.l0Api || {
provider: 'siliconflow',
url: DEFAULT_L0_API_URL,
key: '',
model: DEFAULT_L0_MODEL,
};
}
function getNextKey(rawKey) {
const keys = String(rawKey || '')
.split(/[,;|\n]+/)
.map(k => k.trim())
.filter(Boolean);
if (!keys.length) return '';
if (keys.length === 1) return keys[0];
const idx = l0KeyIndex % keys.length;
l0KeyIndex = (l0KeyIndex + 1) % keys.length;
return keys[idx];
}
/**
* 统一LLM调用 - 走酒馆后端(非流式)
* 临时改为标准 messages 调用,避免 bottomassistant prefill 兼容性问题。
@@ -42,7 +65,8 @@ export async function callLLM(messages, options = {}) {
const mod = getStreamingModule();
if (!mod) throw new Error('Streaming module not ready');
const apiKey = getApiKey() || '';
const apiCfg = getL0ApiConfig();
const apiKey = getNextKey(apiCfg.key);
if (!apiKey) {
throw new Error('L0 requires siliconflow API key');
}
@@ -60,11 +84,11 @@ export async function callLLM(messages, options = {}) {
temperature: String(temperature),
max_tokens: String(max_tokens),
api: 'openai',
apiurl: SILICONFLOW_API_URL,
apiurl: String(apiCfg.url || DEFAULT_L0_API_URL).trim(),
apipassword: apiKey,
model: DEFAULT_L0_MODEL,
model: String(apiCfg.model || DEFAULT_L0_MODEL).trim(),
};
const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
const isQwen3 = String(args.model || '').includes('Qwen3');
if (isQwen3) {
args.enable_thinking = 'false';
}

View File

@@ -4,15 +4,38 @@
// ═══════════════════════════════════════════════════════════════════════════
import { xbLog } from '../../../../core/debug-core.js';
import { getApiKey } from './siliconflow.js';
import { getVectorConfig } from '../../data/config.js';
const MODULE_ID = 'reranker';
const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
const DEFAULT_RERANK_URL = 'https://api.siliconflow.cn/v1';
const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
const DEFAULT_TIMEOUT = 15000;
const MAX_DOCUMENTS = 100; // API 限制
const RERANK_BATCH_SIZE = 20;
const RERANK_MAX_CONCURRENCY = 5;
let rerankKeyIndex = 0;
function getRerankApiConfig() {
const cfg = getVectorConfig() || {};
return cfg.rerankApi || {
provider: 'siliconflow',
url: DEFAULT_RERANK_URL,
key: '',
model: RERANK_MODEL,
};
}
function getNextRerankKey(rawKey) {
const keys = String(rawKey || '')
.split(/[,;|\n]+/)
.map(k => k.trim())
.filter(Boolean);
if (!keys.length) return '';
if (keys.length === 1) return keys[0];
const idx = rerankKeyIndex % keys.length;
rerankKeyIndex = (rerankKeyIndex + 1) % keys.length;
return keys[idx];
}
/**
* 对文档列表进行 Rerank 精排
@@ -37,7 +60,8 @@ export async function rerank(query, documents, options = {}) {
return { results: [], failed: false };
}
const key = getApiKey();
const apiCfg = getRerankApiConfig();
const key = getNextRerankKey(apiCfg.key);
if (!key) {
xbLog.warn(MODULE_ID, '未配置 API Key跳过 rerank');
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
@@ -72,14 +96,15 @@ export async function rerank(query, documents, options = {}) {
try {
const T0 = performance.now();
const response = await fetch(RERANK_URL, {
const baseUrl = String(apiCfg.url || DEFAULT_RERANK_URL).replace(/\/+$/, '');
const response = await fetch(`${baseUrl}/rerank`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: RERANK_MODEL,
model: String(apiCfg.model || RERANK_MODEL),
// Zero-darkbox: do not silently truncate query.
query,
documents: validDocs,

View File

@@ -1,31 +1,39 @@
// ═══════════════════════════════════════════════════════════════════════════
// siliconflow.js - Embedding + 多 Key 轮询
// siliconflow.js - OpenAI-compatible Embedding + 多 Key 轮询
//
// 在 API Key 输入框中用逗号、分号、竖线或换行分隔多个 Key例如
// sk-aaa,sk-bbb,sk-ccc
// 每次调用自动轮询到下一个 Key并发请求会均匀分布到所有 Key 上。
// ═══════════════════════════════════════════════════════════════════════════
import { getVectorConfig } from '../../data/config.js';
const BASE_URL = 'https://api.siliconflow.cn';
const EMBEDDING_MODEL = 'BAAI/bge-m3';
// ★ 多 Key 轮询状态
let _keyIndex = 0;
function getEmbeddingApiConfig() {
const cfg = getVectorConfig() || {};
return cfg.embeddingApi || {
provider: 'siliconflow',
url: `${BASE_URL}/v1`,
key: '',
model: EMBEDDING_MODEL,
};
}
/**
* 从 localStorage 解析所有 Key支持逗号、分号、竖线、换行分隔
*/
function parseKeys() {
function parseKeys(rawKey) {
try {
const raw = localStorage.getItem('summary_panel_config');
if (raw) {
const parsed = JSON.parse(raw);
const keyStr = parsed.vector?.online?.key || '';
return keyStr
.split(/[,;|\n]+/)
.map(k => k.trim())
.filter(k => k.length > 0);
}
const keyStr = String(rawKey || '');
return keyStr
.split(/[,;|\n]+/)
.map(k => k.trim())
.filter(k => k.length > 0);
} catch { }
return [];
}
@@ -34,8 +42,8 @@ function parseKeys() {
* 获取下一个可用的 API Key轮询
* 每次调用返回不同的 Key自动循环
*/
export function getApiKey() {
const keys = parseKeys();
export function getApiKey(rawKey = null) {
const keys = parseKeys(rawKey ?? getEmbeddingApiConfig().key);
if (!keys.length) return null;
if (keys.length === 1) return keys[0];
@@ -51,7 +59,7 @@ export function getApiKey() {
* 获取当前配置的 Key 数量(供外部模块动态调整并发用)
*/
export function getKeyCount() {
return Math.max(1, parseKeys().length);
return Math.max(1, parseKeys(getEmbeddingApiConfig().key).length);
}
// ═══════════════════════════════════════════════════════════════════════════
@@ -61,22 +69,24 @@ export function getKeyCount() {
export async function embed(texts, options = {}) {
if (!texts?.length) return [];
const key = getApiKey();
if (!key) throw new Error('未配置硅基 API Key');
const apiCfg = options.apiConfig || getEmbeddingApiConfig();
const key = getApiKey(apiCfg.key);
if (!key) throw new Error('未配置 Embedding API Key');
const { timeout = 30000, signal } = options;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(`${BASE_URL}/v1/embeddings`, {
const baseUrl = String(apiCfg.url || `${BASE_URL}/v1`).replace(/\/+$/, '');
const response = await fetch(`${baseUrl}/embeddings`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: EMBEDDING_MODEL,
model: String(apiCfg.model || EMBEDDING_MODEL),
input: texts,
}),
signal: signal || controller.signal,

View File

@@ -29,7 +29,7 @@ import { filterText } from '../utils/text-filter.js';
const MODULE_ID = 'state-integration';
// ★ 并发配置
const CONCURRENCY = 10;
const DEFAULT_CONCURRENCY = 10;
const STAGGER_DELAY = 15;
const DEBUG_CONCURRENCY = true;
const R_AGG_MAX_CHARS = 256;
@@ -168,7 +168,9 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
return { built: 0 };
}
xbLog.info(MODULE_ID, `增量 L0 提取pending=${pendingPairs.length}, concurrency=${CONCURRENCY}`);
const concurrency = Math.max(1, Math.min(50, Number(vectorCfg?.l0Concurrency) || DEFAULT_CONCURRENCY));
xbLog.info(MODULE_ID, `增量 L0 提取pending=${pendingPairs.length}, concurrency=${concurrency}`);
let completed = 0;
let failed = 0;
@@ -181,14 +183,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
// ★ Phase 1: 收集所有新提取的 atoms不向量化
const allNewAtoms = [];
// ★ 限流检测:连续失败 N 次后暂停并降速
let consecutiveFailures = 0;
let rateLimited = false;
const RATE_LIMIT_THRESHOLD = 6; // 连续失败多少次触发限流保护
const RATE_LIMIT_WAIT_MS = 60000; // 限流后等待时间60 秒)
const RETRY_INTERVAL_MS = 1000; // 降速模式下每次请求间隔1 秒)
const RETRY_CONCURRENCY = 1; // ★ 降速模式下的并发数默认1建议不要超过5
// ★ 通用处理单个 pair 的逻辑(复用于正常模式和降速模式)
const processPair = async (pair, idx, workerId) => {
const floor = pair.aiFloor;
@@ -209,9 +203,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
throw new Error('llm_failed');
}
// ★ 成功:重置连续失败计数
consecutiveFailures = 0;
if (!atoms.length) {
setL0FloorStatus(floor, { status: 'empty', reason: 'llm_empty', atoms: 0 });
} else {
@@ -231,13 +222,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
reason: String(e?.message || e).replace(/\s+/g, ' ').slice(0, 120),
});
failed++;
// ★ 限流检测:连续失败累加
consecutiveFailures++;
if (consecutiveFailures >= RATE_LIMIT_THRESHOLD && !rateLimited) {
rateLimited = true;
xbLog.warn(MODULE_ID, `连续失败 ${consecutiveFailures} 次,疑似触发 API 限流,将暂停所有并发`);
}
} finally {
active--;
if (!extractionCancelled) {
@@ -252,12 +236,12 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
};
// ★ 并发池处理(保持固定并发度)
const poolSize = Math.min(CONCURRENCY, pendingPairs.length);
const poolSize = Math.min(concurrency, pendingPairs.length);
let nextIndex = 0;
let started = 0;
const runWorker = async (workerId) => {
while (true) {
if (extractionCancelled || rateLimited) return;
if (extractionCancelled) return;
const idx = nextIndex++;
if (idx >= pendingPairs.length) return;
@@ -267,7 +251,7 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
await new Promise(r => setTimeout(r, stagger * STAGGER_DELAY));
}
if (extractionCancelled || rateLimited) return;
if (extractionCancelled) return;
await processPair(pair, idx, workerId);
}
@@ -279,61 +263,6 @@ export async function incrementalExtractAtoms(chatId, chat, onProgress, options
xbLog.info(MODULE_ID, `L0 pool done completed=${completed}/${total} failed=${failed} peakActive=${peakActive} elapsedMs=${elapsed}`);
}
// ═════════════════════════════════════════════════════════════════════
// ★ 限流恢复:重置进度,从头开始以限速模式慢慢跑
// ═════════════════════════════════════════════════════════════════════
if (rateLimited && !extractionCancelled) {
const waitSec = RATE_LIMIT_WAIT_MS / 1000;
xbLog.info(MODULE_ID, `限流保护:将重置进度并从头开始降速重来(并发=${RETRY_CONCURRENCY}, 间隔=${RETRY_INTERVAL_MS}ms`);
onProgress?.(`疑似限流,${waitSec}s 后降速重头开始...`, completed, total);
await new Promise(r => setTimeout(r, RATE_LIMIT_WAIT_MS));
if (!extractionCancelled) {
// ★ 核心逻辑:重置计数器,让 UI 从 0 开始跑,给用户“重头开始”的反馈
rateLimited = false;
consecutiveFailures = 0;
completed = 0;
failed = 0;
let retryNextIdx = 0;
xbLog.info(MODULE_ID, `限流恢复:开始降速模式扫描 ${pendingPairs.length} 个楼层`);
const retryWorkers = Math.min(RETRY_CONCURRENCY, pendingPairs.length);
const runRetryWorker = async (wid) => {
while (true) {
if (extractionCancelled) return;
const idx = retryNextIdx++;
if (idx >= pendingPairs.length) return;
const pair = pendingPairs[idx];
const floor = pair.aiFloor;
// ★ 检查该楼层状态
const st = getL0FloorStatus(floor);
if (st?.status === 'ok' || st?.status === 'empty') {
// 刚才已经成功了,直接跳过(仅增加进度计数)
completed++;
onProgress?.(`提取: ${completed}/${total} (跳过已完成)`, completed, total);
continue;
}
// ★ 没做过的,用 slow 模式处理
await processPair(pair, idx, `retry-${wid}`);
// 每个请求后休息,避免再次触发限流
if (idx < pendingPairs.length - 1 && RETRY_INTERVAL_MS > 0) {
await new Promise(r => setTimeout(r, RETRY_INTERVAL_MS));
}
}
};
await Promise.all(Array.from({ length: retryWorkers }, (_, i) => runRetryWorker(i)));
xbLog.info(MODULE_ID, `降速重头开始阶段结束`);
}
}
try {
saveMetadataDebounced?.();
} catch { }

View File

@@ -1,15 +1,13 @@
// ═══════════════════════════════════════════════════════════════════════════
// Story Summary - Embedder (v2 - 统一硅基)
// 所有 embedding 请求转发到 siliconflow.js
// Story Summary - Embedder
// ═══════════════════════════════════════════════════════════════════════════
import { embed as sfEmbed, getApiKey } from '../llm/siliconflow.js';
import { embed as sfEmbed } from '../llm/siliconflow.js';
// ═══════════════════════════════════════════════════════════════════════════
// 统一 embed 接口
// ═══════════════════════════════════════════════════════════════════════════
export async function embed(texts, config, options = {}) {
// 忽略旧的 config 参数,统一走硅基
return await sfEmbed(texts, options);
}
@@ -18,8 +16,10 @@ export async function embed(texts, config, options = {}) {
// ═══════════════════════════════════════════════════════════════════════════
export function getEngineFingerprint(config) {
// 统一使用硅基 bge-m3
return 'siliconflow:bge-m3:1024';
const api = config?.embeddingApi || {};
const provider = String(api.provider || 'siliconflow').toLowerCase();
const model = String(api.model || 'BAAI/bge-m3').trim() || 'BAAI/bge-m3';
return `${provider}:${model}:1024`;
}
// ═══════════════════════════════════════════════════════════════════════════
@@ -47,14 +47,13 @@ export async function deleteLocalModelCache() { }
// 在线服务测试
// ═══════════════════════════════════════════════════════════════════════════
export async function testOnlineService() {
const key = getApiKey();
if (!key) {
throw new Error('请配置硅基 API Key');
export async function testOnlineService(_provider, config = {}) {
if (!config?.key) {
throw new Error('请配置 Embedding API Key');
}
try {
const [vec] = await sfEmbed(['测试连接']);
const [vec] = await sfEmbed(['测试连接'], { apiConfig: config });
return { success: true, dims: vec?.length || 0 };
} catch (e) {
throw new Error(`连接失败: ${e.message}`);
@@ -62,7 +61,6 @@ export async function testOnlineService() {
}
export async function fetchOnlineModels() {
// 硅基模型固定
return ['BAAI/bge-m3'];
}
@@ -78,6 +76,6 @@ export const ONLINE_PROVIDERS = {
siliconflow: {
id: 'siliconflow',
name: '硅基流动',
baseUrl: 'https://api.siliconflow.cn',
baseUrl: 'https://api.siliconflow.cn/v1',
},
};