feat(story-summary): make vector APIs configurable
This commit is contained in:
@@ -2,14 +2,15 @@
|
||||
// vector/llm/llm-service.js - 修复 prefill 传递方式
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getApiKey } from './siliconflow.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'vector-llm-service';
|
||||
const SILICONFLOW_API_URL = 'https://api.siliconflow.cn/v1';
|
||||
const DEFAULT_L0_MODEL = 'Qwen/Qwen3-8B';
|
||||
const DEFAULT_L0_API_URL = 'https://api.siliconflow.cn/v1';
|
||||
|
||||
let callCounter = 0;
|
||||
const activeL0SessionIds = new Set();
|
||||
let l0KeyIndex = 0;
|
||||
|
||||
function getStreamingModule() {
|
||||
const mod = window.xiaobaixStreamingGeneration;
|
||||
@@ -28,6 +29,28 @@ function b64UrlEncode(str) {
|
||||
return btoa(bin).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
|
||||
}
|
||||
|
||||
function getL0ApiConfig() {
|
||||
const cfg = getVectorConfig() || {};
|
||||
return cfg.l0Api || {
|
||||
provider: 'siliconflow',
|
||||
url: DEFAULT_L0_API_URL,
|
||||
key: '',
|
||||
model: DEFAULT_L0_MODEL,
|
||||
};
|
||||
}
|
||||
|
||||
function getNextKey(rawKey) {
|
||||
const keys = String(rawKey || '')
|
||||
.split(/[,;|\n]+/)
|
||||
.map(k => k.trim())
|
||||
.filter(Boolean);
|
||||
if (!keys.length) return '';
|
||||
if (keys.length === 1) return keys[0];
|
||||
const idx = l0KeyIndex % keys.length;
|
||||
l0KeyIndex = (l0KeyIndex + 1) % keys.length;
|
||||
return keys[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* 统一LLM调用 - 走酒馆后端(非流式)
|
||||
* 临时改为标准 messages 调用,避免 bottomassistant prefill 兼容性问题。
|
||||
@@ -42,7 +65,8 @@ export async function callLLM(messages, options = {}) {
|
||||
const mod = getStreamingModule();
|
||||
if (!mod) throw new Error('Streaming module not ready');
|
||||
|
||||
const apiKey = getApiKey() || '';
|
||||
const apiCfg = getL0ApiConfig();
|
||||
const apiKey = getNextKey(apiCfg.key);
|
||||
if (!apiKey) {
|
||||
throw new Error('L0 requires siliconflow API key');
|
||||
}
|
||||
@@ -60,11 +84,11 @@ export async function callLLM(messages, options = {}) {
|
||||
temperature: String(temperature),
|
||||
max_tokens: String(max_tokens),
|
||||
api: 'openai',
|
||||
apiurl: SILICONFLOW_API_URL,
|
||||
apiurl: String(apiCfg.url || DEFAULT_L0_API_URL).trim(),
|
||||
apipassword: apiKey,
|
||||
model: DEFAULT_L0_MODEL,
|
||||
model: String(apiCfg.model || DEFAULT_L0_MODEL).trim(),
|
||||
};
|
||||
const isQwen3 = String(DEFAULT_L0_MODEL || '').includes('Qwen3');
|
||||
const isQwen3 = String(args.model || '').includes('Qwen3');
|
||||
if (isQwen3) {
|
||||
args.enable_thinking = 'false';
|
||||
}
|
||||
|
||||
@@ -4,15 +4,38 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { xbLog } from '../../../../core/debug-core.js';
|
||||
import { getApiKey } from './siliconflow.js';
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const MODULE_ID = 'reranker';
|
||||
const RERANK_URL = 'https://api.siliconflow.cn/v1/rerank';
|
||||
const DEFAULT_RERANK_URL = 'https://api.siliconflow.cn/v1';
|
||||
const RERANK_MODEL = 'BAAI/bge-reranker-v2-m3';
|
||||
const DEFAULT_TIMEOUT = 15000;
|
||||
const MAX_DOCUMENTS = 100; // API 限制
|
||||
const RERANK_BATCH_SIZE = 20;
|
||||
const RERANK_MAX_CONCURRENCY = 5;
|
||||
let rerankKeyIndex = 0;
|
||||
|
||||
function getRerankApiConfig() {
|
||||
const cfg = getVectorConfig() || {};
|
||||
return cfg.rerankApi || {
|
||||
provider: 'siliconflow',
|
||||
url: DEFAULT_RERANK_URL,
|
||||
key: '',
|
||||
model: RERANK_MODEL,
|
||||
};
|
||||
}
|
||||
|
||||
function getNextRerankKey(rawKey) {
|
||||
const keys = String(rawKey || '')
|
||||
.split(/[,;|\n]+/)
|
||||
.map(k => k.trim())
|
||||
.filter(Boolean);
|
||||
if (!keys.length) return '';
|
||||
if (keys.length === 1) return keys[0];
|
||||
const idx = rerankKeyIndex % keys.length;
|
||||
rerankKeyIndex = (rerankKeyIndex + 1) % keys.length;
|
||||
return keys[idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* 对文档列表进行 Rerank 精排
|
||||
@@ -37,7 +60,8 @@ export async function rerank(query, documents, options = {}) {
|
||||
return { results: [], failed: false };
|
||||
}
|
||||
|
||||
const key = getApiKey();
|
||||
const apiCfg = getRerankApiConfig();
|
||||
const key = getNextRerankKey(apiCfg.key);
|
||||
if (!key) {
|
||||
xbLog.warn(MODULE_ID, '未配置 API Key,跳过 rerank');
|
||||
return { results: documents.map((_, i) => ({ index: i, relevance_score: 0 })), failed: true };
|
||||
@@ -72,14 +96,15 @@ export async function rerank(query, documents, options = {}) {
|
||||
try {
|
||||
const T0 = performance.now();
|
||||
|
||||
const response = await fetch(RERANK_URL, {
|
||||
const baseUrl = String(apiCfg.url || DEFAULT_RERANK_URL).replace(/\/+$/, '');
|
||||
const response = await fetch(`${baseUrl}/rerank`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: RERANK_MODEL,
|
||||
model: String(apiCfg.model || RERANK_MODEL),
|
||||
// Zero-darkbox: do not silently truncate query.
|
||||
query,
|
||||
documents: validDocs,
|
||||
|
||||
@@ -1,31 +1,39 @@
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// siliconflow.js - Embedding + 多 Key 轮询
|
||||
// siliconflow.js - OpenAI-compatible Embedding + 多 Key 轮询
|
||||
//
|
||||
// 在 API Key 输入框中用逗号、分号、竖线或换行分隔多个 Key,例如:
|
||||
// sk-aaa,sk-bbb,sk-ccc
|
||||
// 每次调用自动轮询到下一个 Key,并发请求会均匀分布到所有 Key 上。
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
import { getVectorConfig } from '../../data/config.js';
|
||||
|
||||
const BASE_URL = 'https://api.siliconflow.cn';
|
||||
const EMBEDDING_MODEL = 'BAAI/bge-m3';
|
||||
|
||||
// ★ 多 Key 轮询状态
|
||||
let _keyIndex = 0;
|
||||
|
||||
function getEmbeddingApiConfig() {
|
||||
const cfg = getVectorConfig() || {};
|
||||
return cfg.embeddingApi || {
|
||||
provider: 'siliconflow',
|
||||
url: `${BASE_URL}/v1`,
|
||||
key: '',
|
||||
model: EMBEDDING_MODEL,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 localStorage 解析所有 Key(支持逗号、分号、竖线、换行分隔)
|
||||
*/
|
||||
function parseKeys() {
|
||||
function parseKeys(rawKey) {
|
||||
try {
|
||||
const raw = localStorage.getItem('summary_panel_config');
|
||||
if (raw) {
|
||||
const parsed = JSON.parse(raw);
|
||||
const keyStr = parsed.vector?.online?.key || '';
|
||||
return keyStr
|
||||
.split(/[,;|\n]+/)
|
||||
.map(k => k.trim())
|
||||
.filter(k => k.length > 0);
|
||||
}
|
||||
const keyStr = String(rawKey || '');
|
||||
return keyStr
|
||||
.split(/[,;|\n]+/)
|
||||
.map(k => k.trim())
|
||||
.filter(k => k.length > 0);
|
||||
} catch { }
|
||||
return [];
|
||||
}
|
||||
@@ -34,8 +42,8 @@ function parseKeys() {
|
||||
* 获取下一个可用的 API Key(轮询)
|
||||
* 每次调用返回不同的 Key,自动循环
|
||||
*/
|
||||
export function getApiKey() {
|
||||
const keys = parseKeys();
|
||||
export function getApiKey(rawKey = null) {
|
||||
const keys = parseKeys(rawKey ?? getEmbeddingApiConfig().key);
|
||||
if (!keys.length) return null;
|
||||
if (keys.length === 1) return keys[0];
|
||||
|
||||
@@ -51,7 +59,7 @@ export function getApiKey() {
|
||||
* 获取当前配置的 Key 数量(供外部模块动态调整并发用)
|
||||
*/
|
||||
export function getKeyCount() {
|
||||
return Math.max(1, parseKeys().length);
|
||||
return Math.max(1, parseKeys(getEmbeddingApiConfig().key).length);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
@@ -61,22 +69,24 @@ export function getKeyCount() {
|
||||
export async function embed(texts, options = {}) {
|
||||
if (!texts?.length) return [];
|
||||
|
||||
const key = getApiKey();
|
||||
if (!key) throw new Error('未配置硅基 API Key');
|
||||
const apiCfg = options.apiConfig || getEmbeddingApiConfig();
|
||||
const key = getApiKey(apiCfg.key);
|
||||
if (!key) throw new Error('未配置 Embedding API Key');
|
||||
|
||||
const { timeout = 30000, signal } = options;
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${BASE_URL}/v1/embeddings`, {
|
||||
const baseUrl = String(apiCfg.url || `${BASE_URL}/v1`).replace(/\/+$/, '');
|
||||
const response = await fetch(`${baseUrl}/embeddings`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: EMBEDDING_MODEL,
|
||||
model: String(apiCfg.model || EMBEDDING_MODEL),
|
||||
input: texts,
|
||||
}),
|
||||
signal: signal || controller.signal,
|
||||
|
||||
Reference in New Issue
Block a user